def __init__(self, attn, checkpoint): super(SetupModel, self).__init__() self.data = Input() self.output = None self.saver = None self.best_saver = None self.train_gv_summaries_op = None self.val_init_op_list = None self.train_init_op_list = None self.train_op_list = None self.train_summaries_op = None self.val_op_list = None self.attn = attn self.checkpoint = checkpoint
def main(): data = Input(split='train', mode='subt') model = Model(data) for v in tf.global_variables(): print(v) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 with tf.Session(config=config) as sess: sess.run([model.data.initializer, tf.global_variables_initializer()], feed_dict=data.feed_dict) # q, a, s = sess.run([model.ques_enc, model.ans_enc, model.subt_enc]) # print(q.shape, a.shape, s.shape) # a, b, c, d = sess.run(model.tri_word_encodes) # print(a, b, c, d) # print(a.shape, b.shape, c.shape, d.shape) a, b = sess.run([model.subt, model.abs]) print(a, b) print(a.shape, b.shape)
def __init__(self): if reset: if os.path.exists(self._checkpoint_dir): os.system('rm -rf %s' % self._checkpoint_dir) if os.path.exists(self._log_dir): os.system('rm -rf %s' % self._log_dir) if os.path.exists(self._attn_dir): os.system('rm -rf %s' % self._attn_dir) fu.make_dirs(os.path.join(self._checkpoint_dir, 'best')) fu.make_dirs(self._log_dir) fu.make_dirs(self._attn_dir) self.train_data = Input(split='train', mode=args.mode) self.val_data = Input(split='val', mode=args.mode) # self.test_data = TestInput() self.train_model = mod.Model(self.train_data, scale=hp['reg'], training=True) with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.val_model = mod.Model(self.val_data) # with tf.variable_scope(tf.get_variable_scope(), reuse=True): # self.test_model = mod.Model(self.test_data, training=False) for v in tf.trainable_variables(): print(v) self.main_loss = mu.get_loss(hp['loss'], self.train_data.gt, self.train_model.output) self.regu_loss = tf.losses.get_regularization_loss() self.loss = 0 self.loss += self.main_loss self.loss += self.regu_loss self.train_answer = tf.argmax(self.train_model.output, axis=1) self.train_accuracy, self.train_accuracy_update, self.train_accuracy_initializer \ = mu.get_acc(self.train_data.gt, self.train_answer, name='train_accuracy') self.val_answer = tf.argmax(self.val_model.output, axis=1) self.val_accuracy, self.val_accuracy_update, self.val_accuracy_initializer \ = mu.get_acc(self.val_data.gt, tf.argmax(self.val_model.output, axis=1), name='val_accuracy') self.global_step = tf.train.get_or_create_global_step() decay_step = int(hp['decay_epoch'] * len(self.train_data)) self.learning_rate = mu.get_lr(hp['decay_type'], hp['learning_rate'], self.global_step, decay_step, hp['decay_rate']) self.optimizer = mu.get_opt(hp['opt'], self.learning_rate, decay_step) grads_and_vars = self.optimizer.compute_gradients(self.loss) lang_grads_and_vars = [(grad, var) for grad, var in grads_and_vars if 'Visual' not in var.name] vis_grad_and_vars = [(grad, var) for grad, var in grads_and_vars if 'Visual' in var.name] self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(self.update_ops): self.lang_train_op = tf.group( self.optimizer.apply_gradients(lang_grads_and_vars, self.global_step), self.train_accuracy_update) self.vis_train_op = tf.group( self.optimizer.apply_gradients(vis_grad_and_vars, self.global_step), self.train_accuracy_update) self.saver = tf.train.Saver(tf.global_variables()) self.best_saver = tf.train.Saver(tf.global_variables()) if args.checkpoint: self.checkpoint_file = args.checkpoint else: self.checkpoint_file = tf.train.latest_checkpoint( self._checkpoint_dir) self.train_init_op_list = [ self.train_data.initializer, self.train_accuracy_initializer ] self.val_init_op_list = [ self.val_data.initializer, self.val_accuracy_initializer ] self.lang_train_op_list = [ self.lang_train_op, self.loss, self.train_accuracy, self.global_step ] self.vis_train_op_list = [ self.vis_train_op, self.loss, self.train_accuracy, self.global_step ] self.val_op_list = [self.val_accuracy, self.val_accuracy_update] if attn: self.lang_train_op_list += [ self.train_model.attn, self.train_model.belief, self.train_data.gt, self.train_answer ] self.vis_train_op_list += [ self.train_model.attn, self.train_model.belief, self.train_data.gt, self.train_answer ] self.val_op_list += [ self.val_model.attn, self.val_model.belief, self.val_data.gt, self.val_answer ]
def __init__(self): if reset: if os.path.exists(self._checkpoint_dir): os.system('rm -rf %s' % self._checkpoint_dir) if os.path.exists(self._log_dir): os.system('rm -rf %s' % self._log_dir) fu.make_dirs(os.path.join(self._checkpoint_dir, 'best')) fu.make_dirs(self._log_dir) self.train_data = Input(split='train', mode=args.mode) self.val_data = Input(split='val', mode=args.mode) # self.test_data = TestInput() self.train_model = mod.Model(self.train_data, beta=hp['reg'], training=True) with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.val_model = mod.Model(self.val_data) # with tf.variable_scope(tf.get_variable_scope(), reuse=True): # self.test_model = mod.Model(self.test_data, training=False) for v in tf.trainable_variables(): print(v) self.main_loss = mu.get_loss(hp['loss'], self.train_data.gt, self.train_model.output) self.real_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.train_model.real_logit, labels=tf.ones_like(self.train_model.real_logit))) self.fake_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.train_model.fake_logit, labels=tf.zeros_like(self.train_model.fake_logit))) self.d_loss = self.real_loss + self.fake_loss self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.train_model.fake_logit, labels=tf.ones_like(self.train_model.fake_logit))) self.regu_loss = tf.losses.get_regularization_loss() self.loss = self.g_loss self.loss += self.main_loss self.loss += self.regu_loss self.d_loss += self.regu_loss self.train_accuracy, self.train_accuracy_update, self.train_accuracy_initializer \ = mu.get_acc(self.train_data.gt, tf.argmax(self.train_model.output, axis=1), name='train_accuracy') self.val_accuracy, self.val_accuracy_update, self.val_accuracy_initializer \ = mu.get_acc(self.val_data.gt, tf.argmax(self.val_model.output, axis=1), name='val_accuracy') self.global_step = tf.train.get_or_create_global_step() decay_step = int(hp['decay_epoch'] * len(self.train_data)) self.learning_rate = mu.get_lr(hp['decay_type'], hp['learning_rate'], self.global_step, decay_step, hp['decay_rate']) self.optimizer = mu.get_opt(hp['opt'], self.learning_rate, decay_step) self.d_optimizer = mu.get_opt(hp['opt'], self.learning_rate, decay_step) grads_and_vars = self.optimizer.compute_gradients(self.loss, var_list=tf.trainable_variables('QA')) gradients, variables = list(zip(*grads_and_vars)) self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(self.update_ops): self.train_op = tf.group(self.optimizer.apply_gradients(grads_and_vars), self.d_optimizer.minimize(self.d_loss, self.global_step, tf.trainable_variables('Discriminator')), self.train_accuracy_update) self.saver = tf.train.Saver(tf.global_variables()) self.best_saver = tf.train.Saver(tf.global_variables()) # Summary train_gv_summaries = [] for idx, grad in enumerate(gradients): if grad is not None: train_gv_summaries.append(tf.summary.histogram('gradients/' + variables[idx].name, grad)) train_gv_summaries.append(tf.summary.histogram(variables[idx].name, variables[idx])) train_summaries = [ tf.summary.scalar('train_loss', self.loss), tf.summary.scalar('train_accuracy', self.train_accuracy), tf.summary.scalar('learning_rate', self.learning_rate) ] self.train_summaries_op = tf.summary.merge(train_summaries) self.train_gv_summaries_op = tf.summary.merge(train_gv_summaries + train_summaries) self.val_summaries_op = tf.summary.scalar('val_accuracy', self.val_accuracy) if args.checkpoint: self.checkpoint_file = args.checkpoint else: self.checkpoint_file = tf.train.latest_checkpoint(self._checkpoint_dir) self.train_init_op_list = [self.train_data.initializer, self.train_accuracy_initializer] self.val_init_op_list = [self.val_data.initializer, self.val_accuracy_initializer] self.train_op_list = [self.train_op, self.loss, self.train_accuracy, self.global_step] # self.run_metadata = tf.RunMetadata() self.val_op_list = [self.val_accuracy, self.val_accuracy_update, self.val_summaries_op]
def __init__(self): if reset: if os.path.exists(self._checkpoint_dir): os.system('rm -rf %s' % self._checkpoint_dir) if os.path.exists(self._log_dir): os.system('rm -rf %s' % self._log_dir) if os.path.exists(self._attn_dir): os.system('rm -rf %s' % self._attn_dir) fu.make_dirs(os.path.join(self._checkpoint_dir, 'best')) fu.make_dirs(self._log_dir) fu.make_dirs(self._attn_dir) self.train_data = Input(split='train', mode=args.mode) self.val_data = Input(split='val', mode=args.mode) # self.test_data = TestInput() self.train_model = mod.Model(self.train_data, scale=hp['reg'], training=True) with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.val_model = mod.Model(self.val_data) # with tf.variable_scope(tf.get_variable_scope(), reuse=True): # self.test_model = mod.Model(self.test_data, training=False) for v in tf.trainable_variables(): print(v) self.main_loss = mu.get_loss(hp['loss'], self.train_data.gt, self.train_model.output) self.regu_loss = tf.losses.get_regularization_loss() self.loss = 0 self.loss += self.main_loss self.loss += self.regu_loss self.train_answer = tf.argmax(self.train_model.output, axis=1) self.train_accuracy, self.train_accuracy_update, self.train_accuracy_initializer \ = mu.get_acc(self.train_data.gt, self.train_answer, name='train_accuracy') self.val_answer = tf.argmax(self.val_model.output, axis=1) self.val_accuracy, self.val_accuracy_update, self.val_accuracy_initializer \ = mu.get_acc(self.val_data.gt, tf.argmax(self.val_model.output, axis=1), name='val_accuracy') self.global_step = tf.train.get_or_create_global_step() decay_step = int(hp['decay_epoch'] * len(self.train_data)) self.learning_rate = mu.get_lr(hp['decay_type'], hp['learning_rate'], self.global_step, decay_step, hp['decay_rate']) self.optimizer = mu.get_opt(hp['opt'], self.learning_rate, decay_step) grads_and_vars = self.optimizer.compute_gradients(self.loss) # grads_and_vars = [(tf.clip_by_norm(grad, 0.01, axes=[0]), var) if grad is not None else (grad, var) # for grad, var in grads_and_vars ] gradients, variables = list(zip(*grads_and_vars)) self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(self.update_ops): self.train_op = tf.group( self.optimizer.apply_gradients(grads_and_vars, self.global_step), self.train_accuracy_update) self.saver = tf.train.Saver(tf.global_variables()) self.best_saver = tf.train.Saver(tf.global_variables()) # Summary train_gv_summaries = [] for idx, grad in enumerate(gradients): if grad is not None: train_gv_summaries.append( tf.summary.histogram('gradients/' + variables[idx].name, grad)) train_gv_summaries.append( tf.summary.histogram(variables[idx].name, variables[idx])) train_summaries = [ tf.summary.scalar('train_loss', self.loss), tf.summary.scalar('train_accuracy', self.train_accuracy), tf.summary.scalar('learning_rate', self.learning_rate) ] self.train_summaries_op = tf.summary.merge(train_summaries) self.train_gv_summaries_op = tf.summary.merge(train_gv_summaries + train_summaries) self.val_summaries_op = tf.summary.scalar('val_accuracy', self.val_accuracy) if args.checkpoint: self.checkpoint_file = args.checkpoint else: self.checkpoint_file = tf.train.latest_checkpoint( self._checkpoint_dir) self.train_init_op_list = [ self.train_data.initializer, self.train_accuracy_initializer ] self.val_init_op_list = [ self.val_data.initializer, self.val_accuracy_initializer ] self.train_op_list = [ self.train_op, self.loss, self.train_accuracy, self.global_step ] self.val_op_list = [ self.val_accuracy, self.val_accuracy_update, self.val_summaries_op ] if attn: self.train_op_list += [ self.train_model.attn, self.train_model.belief, self.train_data.gt, self.train_answer ] self.val_op_list += [ self.val_model.attn, self.val_model.belief, self.val_data.gt, self.val_answer ]
def __init__(self): if reset: if os.path.exists(self._checkpoint_dir): os.system('rm -rf %s' % self._checkpoint_dir) if os.path.exists(self._log_dir): os.system('rm -rf %s' % self._log_dir) fu.make_dirs(os.path.join(self._checkpoint_dir, 'best')) fu.make_dirs(self._log_dir) self.train_data = Input(split='train', mode=args.mode) self.step_data = Input(split='train', mode=args.mode) self.val_data = Input(split='val', mode=args.mode) # self.test_data = TestInput() self.train_model = mod.Model(self.train_data, training=True) with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.val_model = mod.Model(self.val_data) # with tf.variable_scope(tf.get_variable_scope(), reuse=True): # self.test_model = mod.Model(self.test_data, training=False) for v in tf.trainable_variables(): print(v) self.loss = tf.losses.sparse_softmax_cross_entropy( self.train_data.gt, self.train_model.output) if args.reg: self.loss += tf.losses.get_regularization_loss() self.train_accuracy, self.train_accuracy_update = tf.metrics.accuracy( self.train_data.gt, tf.argmax(self.train_model.output, axis=1), name='train_accuracy') self.train_accuracy_initializer = tf.variables_initializer( tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope='train_accuracy')) self.val_accuracy, self.val_accuracy_update = tf.metrics.accuracy( self.val_data.gt, tf.argmax(self.val_model.output, axis=1), name='val_accuracy') self.val_accuracy_initializer = tf.variables_initializer( tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope='val_accuracy')) self.global_step = tf.train.get_or_create_global_step() self.learning_rate = mu.get_lr( hp['decay_type'], hp['learning_rate'], self.global_step, hp['decay_epoch'] * len(self.train_data), hp['decay_rate']) self.optimizer = mu.get_opt(hp['opt'], self.learning_rate) grads_and_vars = self.optimizer.compute_gradients(self.loss) gradients, variables = list(zip(*grads_and_vars)) self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(self.update_ops): self.train_op = tf.group( self.optimizer.apply_gradients(grads_and_vars, self.global_step), self.train_accuracy_update) with tf.control_dependencies([self.train_op]): with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.step_model = mod.Model(self.step_data) gamma = tf.equal(self.step_data.gt, tf.argmax(self.step_model.output, axis=1)) neg_grads_and_vars = [(tf.where( tf.logical_and(gamma, tf.ones_like(g, dtype=tf.bool)), self.train_accuracy * g, -(1 + self.train_accuracy / 2) * g), v) for g, v in grads_and_vars] self.roll_back = self.optimizer.apply_gradients(neg_grads_and_vars) self.saver = tf.train.Saver(tf.global_variables()) self.best_saver = tf.train.Saver(tf.global_variables()) # Summary train_gv_summaries = [] for idx, grad in enumerate(gradients): if grad is not None: train_gv_summaries.append(tf.summary.histogram( grad.name, grad)) train_gv_summaries.append( tf.summary.histogram(variables[idx].name, variables[idx])) train_summaries = [ tf.summary.scalar('train_loss', self.loss), tf.summary.scalar('train_accuracy', self.train_accuracy), tf.summary.scalar('learning_rate', self.learning_rate) ] self.train_summaries_op = tf.summary.merge(train_summaries) self.train_gv_summaries_op = tf.summary.merge(train_gv_summaries + train_summaries) self.val_summaries_op = tf.summary.scalar('val_accuracy', self.val_accuracy) if args.checkpoint: self.checkpoint_file = args.checkpoint else: self.checkpoint_file = tf.train.latest_checkpoint( self._checkpoint_dir)
def __init__(self): if reset: if os.path.exists(self._checkpoint_dir): os.system('rm -rf %s' % self._checkpoint_dir) if os.path.exists(self._log_dir): os.system('rm -rf %s' % self._log_dir) if os.path.exists(self._attn_dir): os.system('rm -rf %s' % self._attn_dir) fu.make_dirs(os.path.join(self._checkpoint_dir, 'best')) fu.make_dirs(self._log_dir) fu.make_dirs(self._attn_dir) self.train_data = Input(split='train', mode=args.mode) self.val_data = Input(split='val', mode=args.mode) # self.test_data = TestInput() self.train_model = mod.Model(self.train_data, beta=hp['reg'], training=True) with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.val_model = mod.Model(self.val_data) # with tf.variable_scope(tf.get_variable_scope(), reuse=True): # self.test_model = mod.Model(self.test_data, training=False) for v in tf.trainable_variables(): print(v) self.train_attn = tf.squeeze(self.train_model.attn[self.train_data.gt[0]]) self.val_attn = tf.squeeze(self.val_model.attn[self.val_data.gt[0]]) self.main_loss = mu.get_loss(hp['loss'], self.train_data.gt, self.train_model.output) self.attn_loss = mu.get_loss('hinge', tf.to_float(self.train_data.spec), self.train_attn) self.regu_loss = tf.losses.get_regularization_loss() self.loss = 0 if 'main' in target: self.loss += self.main_loss elif 'attn' in target: self.loss += self.attn_loss self.loss += self.regu_loss self.train_acc, self.train_acc_update, self.train_acc_init = \ mu.get_acc(self.train_data.gt, tf.argmax(self.train_model.output, axis=1), name='train_accuracy') self.train_attn_acc, self.train_attn_acc_update, self.train_attn_acc_init = \ mu.get_acc(self.train_data.spec, tf.to_int32(self.train_attn > 0.5), name='train_attention_accuracy') # self.train_q_attn_acc, self.train_q_attn_acc_update, self.train_q_attn_acc_init = \ # tf.metrics.accuracy(self.train_data.spec, self.train_model.output, name='train_q_attention_accuracy') # # self.train_a_attn_acc, self.train_a_attn_acc_update, self.train_a_attn_acc_init = \ # tf.metrics.accuracy(self.train_data.spec, self.train_model.output, name='train_a_attention_accuracy') self.val_acc, self.val_acc_update, self.val_acc_init = \ mu.get_acc(self.val_data.gt, tf.argmax(self.val_model.output, axis=1), name='val_accuracy') self.val_attn_acc, self.val_attn_acc_update, self.val_attn_acc_init = \ mu.get_acc(self.val_data.spec, tf.to_int32(self.val_attn > 0.5), name='val_attention_accuracy') # self.val_q_attn_acc, self.val_q_attn_acc_update, self.val_q_attn_acc_init = \ # tf.metrics.accuracy(self.train_data.spec, self.train_model.output, name='val_q_attention_accuracy') # # self.val_a_attn_acc, self.val_a_attn_acc_update, self.val_a_attn_acc_init = \ # tf.metrics.accuracy(self.train_data.spec, self.train_model.output, name='val_a_attention_accuracy') self.global_step = tf.train.get_or_create_global_step() decay_step = int(hp['decay_epoch'] * len(self.train_data)) self.learning_rate = mu.get_lr(hp['decay_type'], hp['learning_rate'], self.global_step, decay_step, hp['decay_rate']) self.optimizer = mu.get_opt(hp['opt'], self.learning_rate, decay_step) grads_and_vars = self.optimizer.compute_gradients(self.loss) gradients, variables = list(zip(*grads_and_vars)) self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(self.update_ops): self.train_op = tf.group(self.optimizer.apply_gradients(grads_and_vars, self.global_step), self.train_acc_update, self.train_attn_acc_update) # self.train_a_attn_acc_update, self.train_q_attn_acc_update) self.saver = tf.train.Saver(tf.global_variables()) self.best_saver = tf.train.Saver(tf.global_variables()) # Summary train_gv_summaries = [] for idx, grad in enumerate(gradients): if grad is not None: train_gv_summaries.append(tf.summary.histogram('gradients/' + variables[idx].name, grad)) train_gv_summaries.append(tf.summary.histogram(variables[idx].name, variables[idx])) train_summaries = [ tf.summary.scalar('train_loss', self.loss), tf.summary.scalar('train_accuracy', self.train_acc), # tf.summary.scalar('train_a_attn_accuracy', self.train_a_attn_acc), # tf.summary.scalar('train_q_attn_accuracy', self.train_q_attn_acc), tf.summary.scalar('train_attn_accuracy', self.train_attn_acc), tf.summary.scalar('learning_rate', self.learning_rate) ] self.train_summaries_op = tf.summary.merge(train_summaries) self.train_gv_summaries_op = tf.summary.merge(train_gv_summaries + train_summaries) val_summaries = [ tf.summary.scalar('val_accuracy', self.val_acc), tf.summary.scalar('val_attn_accuracy', self.val_attn_acc), # tf.summary.scalar('val_a_attn_accuracy', self.val_a_attn_acc), # tf.summary.scalar('val_q_attn_accuracy', self.val_q_attn_acc), ] self.val_summaries_op = tf.summary.merge(val_summaries) if args.checkpoint: self.checkpoint_file = args.checkpoint else: self.checkpoint_file = tf.train.latest_checkpoint(self._checkpoint_dir) self.train_init_op_list = [self.train_data.initializer, self.train_acc_init, # self.train_q_attn_acc_init, self.train_a_attn_acc_init, self.train_attn_acc_init] self.val_init_op_list = [self.val_data.initializer, self.val_acc_init, # self.val_q_attn_acc_init, self.val_a_attn_acc_init, self.val_attn_acc_init] self.train_op_list = [self.train_op, self.loss, self.attn_loss, self.train_acc, self.train_attn_acc, # self.val_q_attn_acc, self.val_a_attn_acc, self.global_step, self.train_data.spec, self.train_attn] self.val_op_list = [self.val_acc, self.val_attn_acc, # self.val_q_attn_acc, self.val_a_attn_acc, tf.group(self.val_acc_update, self.val_attn_acc_update # self.val_q_attn_acc_update, self.val_a_attn_acc_update ), self.val_summaries_op, self.val_data.spec, self.val_attn]