def _gen_ops(self): """""" optimizer = optimizers.RadamOptimizer(self._config, global_step=self.global_step) train_output = self._model(self._trainset) l2_loss = self.l2_reg * tf.add_n([ tf.nn.l2_loss(matrix) for matrix in tf.get_collection('Weights') ]) if self.l2_reg else self.model.ZERO recur_loss = self.recur_reg * tf.add_n( tf.get_collection( 'recur_losses')) if self.recur_reg else self.model.ZERO covar_loss = self.covar_reg * tf.add_n( tf.get_collection( 'covar_losses')) if self.covar_reg else self.model.ZERO ortho_loss = self.ortho_reg * tf.add_n( tf.get_collection( 'ortho_losses')) if self.ortho_reg else self.model.ZERO regularization_loss = recur_loss + covar_loss + ortho_loss if self.recur_reg or self.covar_reg or self.ortho_reg or 'pretrain_loss' in train_output: optimizer2 = optimizers.RadamOptimizer(self._config) pretrain_loss = train_output.get('pretrain_loss', self.model.ZERO) pretrain_op = optimizer2.minimize(pretrain_loss + regularization_loss) else: pretrain_loss = self.model.ZERO pretrain_op = self.model.ZERO train_op = optimizer.minimize(train_output['loss'] + l2_loss + regularization_loss) # These have to happen after optimizer.minimize is called valid_output = self._model(self._validset, moving_params=optimizer) test_output = self._model(self._testset, moving_params=optimizer) ops = {} ops['pretrain_op'] = [ pretrain_op, pretrain_loss, recur_loss, covar_loss, ortho_loss ] ops['train_op'] = [ train_op, train_output['loss'] + l2_loss + regularization_loss, train_output['n_correct'], train_output['n_tokens'] ] ops['valid_op'] = [ valid_output['loss'], valid_output['n_correct'], valid_output['n_tokens'], valid_output['predictions'] ] ops['test_op'] = [ valid_output['probabilities'], test_output['probabilities'] ] ops['optimizer'] = optimizer return ops
def _gen_ops(self): """""" optimizer = optimizers.RadamOptimizer(self._config, global_step=self.global_step) train_output = self._model(self._trainset) train_op = optimizer.minimize(train_output['loss']) # These have to happen after optimizer.minimize is called valid_output = self._model(self._validset, moving_params=optimizer) test_output = self._model(self._testset, moving_params=optimizer) ops = {} ops['train_op'] = [ train_op, train_output['loss'], train_output['n_correct'], train_output['n_tokens'] ] ops['valid_op'] = [ valid_output['loss'], valid_output['n_correct'], valid_output['n_tokens'], valid_output['predictions'] ] ops['test_op'] = [[ valid_output['predictions_test'], valid_output['probabilities_test'] ], [ test_output['predictions_test'], test_output['probabilities_test'] ]] ops['optimizer'] = optimizer return ops
def _gen_ops(self): """""" ops = {} optimizer = optimizers.RadamOptimizer(self._config, global_step=self.global_step) train_output = self._model(self._trainset) train_op = optimizer.minimize(train_output['loss']) ops['train_op'] = [train_op, train_output['loss'], train_output['n_correct'], train_output['n_tokens']] # These have to happen after optimizer.minimize is called if self._validset: valid_output = self._model(self._validset, moving_params=optimizer) ops['valid_op'] = [valid_output['loss'], valid_output['n_correct'], valid_output['n_tokens'], valid_output['predictions']] else: print('No dev set. Skipping...') if self._testset: test_output = self._model(self._testset, moving_params=optimizer) ops['test_op'] = [valid_output['probabilities'] if self._validset else None, test_output['probabilities']] else: print('No test set. Skipping...') ops['optimizer'] = optimizer return ops
def _gen_ops(self): """""" optimizer = optimizers.RadamOptimizer(self._config, global_step=self.global_step) optimizer2 = optimizers.RadamOptimizerMod(self._config, global_step=self.global_step) train_graph = self._model.build_graph(self._trainset) train_gemb_graph = self._model.build_graph(self._train_gemb_set, moving_params=optimizer) self._model.add_gemb_loss_graph(train_gemb_graph) train_op = optimizer.minimize(train_graph['loss']) train_gemb_op = optimizer2.minimize(train_gemb_graph['gemb_loss'], var_list=[var for var in tf.global_variables() if 'gemb/gemb_fc' in var.op.name]) # These have to happen after optimizer.minimize is called # for baselines valid_graph = self._model.build_graph(self._validset, moving_params=optimizer) test_graph = self._model.build_graph(self._testset, moving_params=optimizer) # for gembs valid_get_gemb_graph = self._model.build_graph(self._valid_gemb_set, moving_params=optimizer) self._model.add_get_gemb_graph(valid_get_gemb_graph) test_get_gemb_graph = self._model.build_graph(self._test_gemb_set, moving_params=optimizer) self._model.add_get_gemb_graph(test_get_gemb_graph) valid_gemb_graph = self._model.build_test_gemb_graph(self._valid_gemb_set, moving_params=optimizer) test_gemb_graph = self._model.build_test_gemb_graph(self._test_gemb_set, moving_params=optimizer) self.train_graph = train_graph self.valid_graph = valid_graph self.test_graph = test_graph self.train_gemb_graph = train_gemb_graph self.valid_get_gemb_graph = valid_get_gemb_graph self.valid_gemb_graph = valid_gemb_graph self.test_get_gemb_graph = test_get_gemb_graph self.test_gemb_graph = test_gemb_graph ops = {} ops['train_op'] = [train_op, train_graph['loss'], train_graph['n_correct'], train_graph['n_tokens']] ops['train_gemb_op'] = [train_gemb_op, train_gemb_graph['gemb_loss']] ops['valid_op'] = [valid_graph['loss'], valid_graph['n_correct'], valid_graph['n_tokens'], valid_graph['predictions']] ops['valid_gemb_op'] = [valid_gemb_graph['loss'], valid_gemb_graph['n_correct'], valid_gemb_graph['n_tokens'], valid_gemb_graph['predictions']] ops['optimizer'] = optimizer return ops
def _gen_ops(self): """""" # optims = {k: optimizers.RadamOptimizer(self._config, global_step=step) for k, step in self._global_steps} optimizer = optimizers.RadamOptimizer(self._config, global_step=self._global_step) train_output = self._model(self._trainset) # lrs = map(lambda o: o.learning_rate, optims) lr = optimizer.learning_rate train_op = optimizer.minimize(train_output['loss']) # train_ops = map(lambda k, o: o.minimize(train_output[k]), optims.iteritems()) # # # ['pos_loss', 'trigger_loss', 'actual_parse_loss', 'srl_loss', 'multitask_loss_sum'] # actual_train_ops = [] # if self.train_pos: # actual_train_ops.append(train_ops['pos_loss']) # if self.role_loss_penalty > 0: # actual_train_ops.append(train_ops['srl_loss']) # These have to happen after optimizer.minimize is called valid_output = self._model(self._validset, moving_params=optimizer) test_output = self._model(self._testset, moving_params=optimizer) ops = {} # ops['train_op'] = actual_train_ops + [train_output['loss'], # train_output['n_correct'], # train_output['n_tokens']] # ops['train_op_svd'] = actual_train_ops + [train_output['loss'], # train_output['n_correct'], # train_output['n_tokens'], # train_output['roots_loss'], # train_output['2cycle_loss'], # train_output['svd_loss'], # train_output['log_loss'], # train_output['rel_loss']] # ops['train_op_srl'] = actual_train_ops + [train_output['loss'], # train_output['n_correct'], # train_output['n_tokens'], # train_output['roots_loss'], # train_output['2cycle_loss'], # train_output['svd_loss'], # train_output['log_loss'], # train_output['rel_loss'], # train_output['srl_loss'], # train_output['srl_correct'], # train_output['srl_count'], # train_output['trigger_loss'], # train_output['trigger_count'], # train_output['trigger_correct'], # train_output['pos_loss'], # train_output['pos_correct'], # train_output['multitask_losses']] + lrs ops['train_op'] = [train_op] + [train_output['loss'], train_output['n_correct'], train_output['n_tokens']] ops['train_op_svd'] = [train_op] + [train_output['loss'], train_output['n_correct'], train_output['n_tokens'], train_output['roots_loss'], train_output['2cycle_loss'], train_output['svd_loss'], train_output['log_loss'], train_output['rel_loss']] ops['train_op_srl'] = [train_op] + [train_output['loss'], train_output['n_correct'], train_output['n_tokens'], train_output['roots_loss'], train_output['2cycle_loss'], train_output['svd_loss'], train_output['log_loss'], train_output['rel_loss'], train_output['srl_loss'], train_output['srl_correct'], train_output['srl_count'], train_output['trigger_loss'], train_output['trigger_count'], train_output['trigger_correct'], train_output['pos_loss'], train_output['pos_correct'], train_output['multitask_losses'], lr] ops['valid_op'] = [valid_output['loss'], valid_output['n_correct'], valid_output['n_tokens'], valid_output['predictions']] ops['test_op'] = [valid_output['probabilities'], valid_output['n_cycles'], valid_output['len_2_cycles'], valid_output['srl_probs'], valid_output['srl_preds'], valid_output['srl_logits'], valid_output['srl_correct'], valid_output['srl_count'], valid_output['srl_trigger'], valid_output['srl_trigger_targets'], valid_output['transition_params'], valid_output['attn_weights'], valid_output['attn_correct'], valid_output['pos_correct'], valid_output['pos_preds'], test_output['probabilities'], test_output['n_cycles'], test_output['len_2_cycles'], test_output['srl_probs'], test_output['srl_preds'], test_output['srl_logits'], test_output['srl_correct'], test_output['srl_count'], test_output['srl_trigger'], test_output['srl_trigger_targets'], test_output['transition_params'], test_output['attn_weights'], test_output['attn_correct'], test_output['pos_correct'], test_output['pos_preds'], ] # ops['optimizer'] = optimizer return ops
def _gen_ops(self): """""" optimizer = optimizers.RadamOptimizer(self._config, global_step=self.global_step) train_output = self._model(self._trainset) l2_loss = self.l2_reg * tf.add_n([ tf.nn.l2_loss(matrix) for matrix in tf.get_collection('Weights') ]) if self.l2_reg else self.model.ZERO recur_loss = self.recur_reg * tf.add_n( tf.get_collection( 'recur_losses')) if self.recur_reg else self.model.ZERO covar_loss = self.covar_reg * tf.add_n( tf.get_collection( 'covar_losses')) if self.covar_reg else self.model.ZERO ortho_loss = self.ortho_reg * tf.add_n( tf.get_collection( 'ortho_losses')) if self.ortho_reg else self.model.ZERO regularization_loss = recur_loss + covar_loss + ortho_loss if self.recur_reg or self.covar_reg or self.ortho_reg or 'pretrain_loss' in train_output: optimizer2 = optimizers.RadamOptimizer(self._config) pretrain_loss = train_output.get('pretrain_loss', self.model.ZERO) pretrain_op = optimizer2.minimize( pretrain_loss + regularization_loss, var_list=tf.trainable_variables()) else: pretrain_loss = self.model.ZERO pretrain_op = self.model.ZERO if self.multi: train_output_multi = self._model(self._trainset_multi, multi=True) optimizer_multi = optimizers.RadamOptimizer( self._config, global_step=self.global_step) update_vars_multi = filter(lambda x: u'_base' not in x.name, tf.trainable_variables()) train_op_multi = optimizer_multi.minimize( train_output_multi['loss_multi'] + l2_loss + regularization_loss, var_list=update_vars_multi) update_vars = filter(lambda x: u'_multi' not in x.name, tf.trainable_variables()) L.info( 'train_op_multi is updating the following variables different from train_op:' ) for v in [x for x in update_vars_multi if not x in update_vars]: print(v.name) L.info( 'train_op is updating the following variables different from train_op_multi:' ) for v in [x for x in update_vars if not x in update_vars_multi]: print(v.name) else: update_vars = tf.trainable_variables() train_op = optimizer.minimize(train_output['loss'] + l2_loss + regularization_loss, var_list=update_vars) # These have to happen after optimizer.minimize is called valid_output = self._model(self._validset, moving_params=optimizer) test_output = self._model(self._testset, moving_params=optimizer) ops = {} ops['pretrain_op'] = [ pretrain_op, pretrain_loss, recur_loss, covar_loss, ortho_loss ] ops['train_op'] = [ train_op, train_output['loss'] + l2_loss + regularization_loss, train_output['n_correct'], train_output['n_tokens'] ] if self.multi: ops['train_op_multi'] = [ train_op_multi, train_output_multi['loss_multi'] + l2_loss + regularization_loss, train_output_multi['n_correct_multi'], train_output_multi['n_tokens_multi'] ] ops['valid_op'] = [ valid_output['loss'], valid_output['n_correct'], valid_output['n_tokens'], valid_output['predictions'] ] ops['test_op'] = [ valid_output['probabilities'], test_output['probabilities'] ] ops['optimizer'] = optimizer return ops
def _gen_ops(self): """""" optimizer = optimizers.RadamOptimizer(self._config, global_step=self.global_step) train_output = self._model(self._trainset) l2_loss = self.l2_reg * tf.add_n([ tf.nn.l2_loss(matrix) for matrix in tf.get_collection('Weights') ]) if self.l2_reg else self.model.ZERO recur_loss = self.recur_reg * tf.add_n( tf.get_collection( 'recur_losses')) if self.recur_reg else self.model.ZERO covar_loss = self.covar_reg * tf.add_n( tf.get_collection( 'covar_losses')) if self.covar_reg else self.model.ZERO ortho_loss = self.ortho_reg * tf.add_n( tf.get_collection( 'ortho_losses')) if self.ortho_reg else self.model.ZERO regularization_loss = recur_loss + covar_loss + ortho_loss if self.recur_reg or self.covar_reg or self.ortho_reg or 'pretrain_loss' in train_output: optimizer2 = optimizers.RadamOptimizer(self._config) pretrain_loss = train_output.get('pretrain_loss', self.model.ZERO) pretrain_op = optimizer2.minimize(pretrain_loss + regularization_loss) else: pretrain_loss = self.model.ZERO pretrain_op = self.model.ZERO atscope_p = filter( lambda x: u'Trainable_base' not in x.name and u'RNN0' not in x.name and u'RNN1' not in x.name and u'RNN2' not in x.name and u'RNN3' not in x.name and u'MLP' not in x.name, tf.trainable_variables()) atscope = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Attention_based') atoptimizer = tf.train.AdamOptimizer(learning_rate=0.001) atrainop = atoptimizer.minimize(train_output['loss'], var_list=atscope_p) mdtrainop = optimizer.minimize(train_output['loss'], varlist=atscope_p) names_to_vars_trainable = { v.op.name: v for v in tf.trainable_variables() } print( '####################names_to_vars_trainable#############################################' ) print(names_to_vars_trainable.keys()) print( '#########################filter scope varlist#############################' ) atops = {v.op.name: v for v in atscope_p} print(atops.keys()) train_op = optimizer.minimize(train_output['loss'] + l2_loss + regularization_loss, varlist=atscope) # These have to happen after optimizer.minimize is called valid_output = self._model(self._validset, moving_params=optimizer) test_output = self._model(self._testset, moving_params=optimizer) ops = {} ops['pretrain_op'] = [ pretrain_op, pretrain_loss, recur_loss, covar_loss, ortho_loss ] ops['train_op'] = [ atrainop, train_output['loss'] + l2_loss + regularization_loss, train_output['accuracy'], train_output['n_correct'], train_output['batch_size'] ] ops['valid_op'] = [ valid_output['loss'], valid_output['n_correct'], valid_output['batch_size'], valid_output['predictions'] ] ops['test_op'] = [[ valid_output['probabilities'], valid_output['accuracy'] ], [test_output['probabilities'], test_output['accuracy']]] ops['optimizer'] = optimizer return ops