def train(self, text_train, label_train, text_test, label_test, model_dir): label_test = np.reshape(label_test, [-1, 1]) label_test = labels_smooth(label_test, self._config.class_nums, self._config.label_smooth_eps) with self._graph.as_default(): dataset = tf.data.Dataset.from_tensor_slices((text_train, label_train)) batch_dataset = dataset.batch(batch_size=self._config.batch_size) repeat_dataset = batch_dataset.repeat(self._config.epoch) data_iterator = repeat_dataset.make_one_shot_iterator() next_batch_text, next_batch_label = data_iterator.get_next() global_step = tf.Variable(0, trainable=False) data_len = len(label_train) batch_nums = data_len // self._config.batch_size learning_rate = tf.train.exponential_decay(self._config.init_learning_rate, global_step=global_step, decay_steps=data_len // self._config.batch_size, decay_rate=self._config.learning_rate_decay) train_op = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.loss, global_step=global_step, name="adam-textcnn") init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session(config=gpu_config()) as sess: sess.run(init_op) counter = 1 while True: try: counter += 1 self._mode = "train" x, y = sess.run([next_batch_text, next_batch_label]) y = np.reshape(y, [-1, 1]).astype(np.int32) y = labels_smooth(y, self._config.class_nums, self._config.label_smooth_eps) loss, _ = sess.run([self.loss, train_op], feed_dict={self._input: x, self._target: y}) if counter % batch_nums == 0: print("Epoch %d loss: %lf" % ((counter // batch_nums), loss)) self._mode = "test" accurcy = sess.run(self.accurcy, feed_dict={self._input: text_test, self._target: label_test}) print("Test accurcy:", accurcy) except tf.errors.OutOfRangeError: break
def __init__(self, model, batcher, vocab, ckpt_id=None, fw_sess=None, bw_model=None, bw_sess=None, bidi_ckpt_path=None): self.model = model self.bw_model = model self.batcher = batcher self.vocab = vocab self.sess = tf.Session(config=utils.gpu_config()) if fw_sess is None else fw_sess self.sess2 = bw_sess self.bw_model = bw_model if bw_model is None: ckpt_path = utils.load_ckpt(self.model.hps, self.model.saver, self.sess) print('Checkpoint path name: {}'.format(ckpt_path)) ckpt_name = 'ckpt-' + ckpt_path.split('-')[-1] else: ckpt_name = 'ckpt-' + bidi_ckpt_path.split('-')[-1] self.decode_dir = os.path.join(model.hps.model_path, make_decode_dir_name(ckpt_name, model.hps)) if os.path.exists(self.decode_dir): pass else: os.makedirs(self.decode_dir)
args.out_path + '/y/' + args.gain + '/' + args.test_fnames[j] + '.wav', args.fs, y_out) print("Inference (%s): %3.2f%%. " % (args.out_type, 100 * ((j + 1) / len(args.test_x_len))), end="\r") print('\nInference complete.') if __name__ == '__main__': ## GET COMMAND LINE ARGUMENTS args = utils.args() ## ARGUMENTS args.ver = '3a' args.blocks = ['C3'] + ['B5'] * 40 + ['O1'] args.epoch = 175 # for inference. ## TRAINING AND TESTING SET ARGUMENTS args = deepxi_args(args) ## MAKE DEEP XI NNET net = deepxi_net(args) ## GPU CONFIGURATION config = utils.gpu_config(args.gpu) with tf.Session(config=config) as sess: if args.train: train(sess, net, args) if args.infer: infer(sess, net, args)
else: ## DEEP XI FOR IBM ESTIMATION deepxi_args = utils.args() deepxi_args.ver = '3a' deepxi_args.blocks = ['C3'] + ['B5'] * 40 + ['O1'] deepxi_args.epoch = 175 deepxi_args.stats_path = './DeepXi/stats' deepxi_args.model_path = './DeepXi/model' deepxi_args.train = False deepxi_args = deepxi.deepxi_args(deepxi_args) deepxi_args.infer = True deepxi_graph = tf.Graph() with deepxi_graph.as_default(): deepxi_net = deepxi.deepxi_net(deepxi_args) config = utils.gpu_config(deepxi_args.gpu) deepxi_sess = tf.Session(config=config, graph=deepxi_graph) deepxi_net.saver.restore( deepxi_sess, deepxi_args.model_path + '/epoch-' + str(deepxi_args.epoch)) # load model for epoch. ## MARGINALISATION if spn_args.mft == 'marg': test_noisy_speech(deepxi_sess, deepxi_net, spn_args) ## BOUNDED MARGINALISATION if spn_args.mft == 'bmarg': test_noisy_speech(deepxi_sess, deepxi_net, spn_args) # CLOSE TF GRAPH deepxi_sess.close()
def main(): utils.print_config(args) if 'train' not in args.mode: args.keep_rate = 1.0 args.use_pretrain = True if args.use_pretrain == 'True' else False args.use_aux_task = True if args.use_aux_task == 'True' else False if args.mode == 'lm_train': args.model = 'lm' args.data_path = "./data/wikitext/wikitext-103/processed_wiki_train.bin" args.use_pretrain = False args.model_path = os.path.join(args.model_path, args.exp_name).format( args.model) #model_path default="data/log/{} if not os.path.exists(args.model_path): if 'train' not in args.mode: print(args.model_path) raise ValueError os.makedirs(args.model_path) with open(os.path.join(args.model_path, 'config.json'), 'w', encoding='utf8') as f: json.dump(vars(args), f) print("Default models path: {}".format(args.model_path)) print('code start/ {} mode / {} models'.format(args.mode, args.model)) utils.assign_specific_gpu(args.gpu_nums) vocab = utils.Vocab() vardicts = utils.get_pretrain_weights( args.true_pretrain_ckpt_path ) if args.use_pretrain and args.mode == 'train' else None if args.mode == 'decode': if args.model == 'mmi_bidi': args.beam_size = args.mmi_bsize args.batch_size = args.beam_size modelhps = deepcopy(args) if modelhps.mode == 'decode': modelhps.max_dec_len = 1 if args.model == 'vanilla': model = BaseModel(vocab, modelhps) elif args.model == 'mmi_bidi': if args.mode == 'decode': bw_graph = tf.Graph() with bw_graph.as_default(): bw_model = BaseModel(vocab, args) bw_sess = tf.Session(graph=bw_graph, config=utils.gpu_config()) with bw_sess.as_default(): with bw_graph.as_default(): bidi_ckpt_path = utils.load_ckpt(bw_model.hps, bw_model.saver, bw_sess) fw_graph = tf.Graph() with fw_graph.as_default(): modelhps.model_path = modelhps.model_path.replace( 'mmi_bidi', 'vanilla') modelhps.model = 'vanilla' fw_model = BaseModel(vocab, modelhps) fw_sess = tf.Session(graph=fw_graph) with fw_sess.as_default(): with fw_graph.as_default(): ckpt_path = utils.load_ckpt(fw_model.hps, fw_model.saver, fw_sess) else: model = BaseModel(vocab, modelhps) elif args.model == 'lm': model = LMModel(vocab, modelhps) elif args.model == 'embmin': model = DiverEmbMin(vocab, modelhps) else: raise ValueError print('models load end') if args.mode in ['train', 'lm_train']: train(model, vocab, vardicts) elif args.mode == 'decode': import time if args.model == 'mmi_bidi': batcher = Batcher( vocab, bw_model.hps.data_path.replace('train_', 'test_'), args) decoder = BeamsearchDecoder(fw_model, batcher, vocab, fw_sess=fw_sess, bw_model=bw_model, bw_sess=bw_sess, bidi_ckpt_path=bidi_ckpt_path) else: batcher = Batcher(vocab, model.hps.data_path.replace('train_', 'test_'), args) decoder = BeamsearchDecoder(model, batcher, vocab) decoder.decode() elif args.mode == 'eval': pass
def train(model, vocab, pretrain_vardicts=None): train_data_loader = Batcher_(vocab, model.hps.data_path, args) valid_data_loader = Batcher_(vocab, model.hps.data_path.replace('train_', 'dev_'), args) all_id, claim_id, claim_pers = train_data_loader.get_claim_id() all_val_id, claim_val_id, claim_val_pers = valid_data_loader.get_claim_id() train_logdir, dev_logdir = os.path.join(args.model_path, 'logdir/train'), os.path.join( args.model_path, 'logdir/dev') train_savedir = os.path.join(args.model_path, 'train/') print("[*] Train save directory is: {}".format(train_savedir)) if not os.path.exists(train_logdir): os.makedirs(train_logdir) if not os.path.exists(dev_logdir): os.makedirs(dev_logdir) if not os.path.exists(train_savedir): os.makedirs(train_savedir) # print(all_id) los = model.get_loss() optim = tf.train.MomentumOptimizer(model.hps.meta_learning_rate, 0.9) grads_vars = optim.compute_gradients( los, tf.trainable_variables(), aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE) grads_vars = deal_gradient(grads_vars) grads_cache = [ tf.Variable(np.zeros(t[0].shape.as_list(), np.float32), trainable=False) for t in grads_vars[1:] ] clear_grads_cache_op = tf.group( [gc.assign(tf.zeros_like(gc)) for gc in grads_cache]) accumulate_grad_op = tf.group( [gc.assign_add(gv[0]) for gc, gv in zip(grads_cache, grads_vars[1:])]) new_grads_vars = [(g, gv[1]) for g, gv in zip(grads_cache, grads_vars[1:])] apply_grad_op = optim.apply_gradients(new_grads_vars) print("ready done!") with tf.Session(config=utils.gpu_config()) as sess: if model.hps.use_pretrain: assign_ops, uninitialized_varlist = utils.assign_pretrain_weights( pretrain_vardicts) sess.run(assign_ops) sess.run(tf.initialize_variables(uninitialized_varlist)) else: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() model.saver.save(sess, './save/model') with tf.device('/cpu:0'): saver_ = tf.train.import_meta_graph('./save/model.meta') for meta_iteration in range(model.hps.max_epoch): train_loss_before = [] train_loss_meta = [] epoch_val_loss = 0 for epoch_bs in range(epoch_batch_size): batch_loss = 0 val_all_loss = tf.zeros((), dtype=tf.float32) val_all_dia = [] batch_grad_list = [] sess.run(clear_grads_cache_op) for b_size in range(model.hps.meta_batch_size): # print(b_size) with tf.device('/cpu:0'): shuffle(all_id) cid_list = all_id[:model.hps.batch_size] train_iter, val_iter = train_data_loader.get_data_loader( cid_list, claim_id, claim_pers, model.hps.batch_size) model.try_run(val_iter, sess, accumulate_grad_op) res = model.run_step(val_iter, sess, is_train=False, freeze_layer=model.hps.use_pretrain) v_loss, summaries, step = res['loss'], res[ 'summaries'], res['global_step'] train_loss_before.append(v_loss) #update res_val, val_batch_loss = do_learning_fix_step( model, train_iter, val_iter, sess) print("do learning is done") val_all_loss = tf.add(val_all_loss, val_batch_loss) val_loss, summaries_val, step_val = res_val[ 'loss'], res_val['summaries'], res_val['global_step'] print("val_loss:", val_loss) train_loss_meta.append(val_loss) batch_loss += val_loss #reset saver_.restore(sess, tf.train.latest_checkpoint('./save')) print("reset") print("one batch is done") sess.run(apply_grad_op) the_name_model = './save/' + 'model' + str( meta_iteration) + str(epoch_bs) model.saver.save(sess, the_name_model, write_meta_graph=False) print("epoch: {}, before loss:{} ".format( meta_iteration, np.mean(train_loss_before))) print("epoch: {}, after loss:{} ".format(meta_iteration, np.mean(train_loss_meta))) best_loss = 30 patience = 5 stop_count = 0 if meta_iteration % 2 == 0: num_claim_val = len(all_val_id) val_loss_before = [] val_loss_meta = [] shuffle(all_val_id) for i in range(0, 80, model.hps.batch_size): with tf.device('/cpu:0'): val_cid_list = all_val_id[i:i + model.hps.batch_size] valid_train_iter, valid_val_iter = valid_data_loader.get_data_loader( val_cid_list, claim_val_id, claim_val_pers, model.hps.batch_size) res = model.run_step(valid_val_iter, sess, is_train=False, freeze_layer=model.hps.use_pretrain) loss = res['loss'] val_loss_before.append(loss) #meta tuning res_val_, val_batch_loss = do_learning_fix_step( model, valid_train_iter, valid_val_iter, sess) val_loss_meta.append(res_val_['loss']) saver_.restore(sess, tf.train.latest_checkpoint('./save')) print("epoch: {}, fine tuning loss:{} ".format( meta_iteration, np.mean(val_loss_meta))) if np.mean(val_loss_meta) < best_loss: best_loss = np.mean(val_loss_meta) the_meta_model = train_savedir + 'MetaModel' + str( meta_iteration) model.saver.save(sess, the_meta_model) print("save fine tuning model in {}".format(train_savedir)) else: stop_count += 1 if stop_count > patience: print("loss has been rising, stop training") break
def train(model, vocab, pretrain_vardicts=None): train_data_loader = Batcher(vocab, model.hps.data_path, args) valid_data_loader = Batcher(vocab, model.hps.data_path.replace('train_', 'dev_'), args) if model.hps.mode == 'lm_train': valid_data_loader = Batcher( vocab, model.hps.data_path.replace('train_', 'valid_'), args) with tf.Session(config=utils.gpu_config()) as sess: train_logdir, dev_logdir = os.path.join(args.model_path, 'logdir/train'), os.path.join( args.model_path, 'logdir/dev') train_savedir = os.path.join(args.model_path, 'train/') print("[*] Train save directory is: {}".format(train_savedir)) if not os.path.exists(train_logdir): os.makedirs(train_logdir) if not os.path.exists(dev_logdir): os.makedirs(dev_logdir) if not os.path.exists(train_savedir): os.makedirs(train_savedir) summary_writer1 = tf.summary.FileWriter(train_logdir, sess.graph) summary_writer2 = tf.summary.FileWriter(dev_logdir, sess.graph) """ Initialize with pretrain variables """ if model.hps.use_pretrain: assign_ops, uninitialized_varlist = utils.assign_pretrain_weights( pretrain_vardicts) sess.run(assign_ops) sess.run(tf.initialize_variables(uninitialized_varlist)) else: sess.run(tf.global_variables_initializer()) posterior = [0 for _ in range(model.hps.matrix_num)] prior = [0 for _ in range(model.hps.matrix_num)] step = 0 while True: # 6978 sample for one epoch beg_time = time() batch = train_data_loader.next_batch() sample_per_epoch = 857899 if 'lm' in model.hps.mode else 6978 if model.hps.mode == 'lm_train': res = model.run_step(batch, sess, is_train=True) else: res = model.run_step( batch, sess, is_train=True, freeze_layer=( model.hps.use_pretrain and step < sample_per_epoch / model.hps.batch_size)) loss, summaries, step = res['loss'], res['summaries'], res[ 'global_step'] if model.hps.model == 'posterior': gumbel = res['posterior'] gumbel_prior = res['prior'] selected = np.argsort(-gumbel) selected_poste = [int(el[0]) for el in selected] selected_prior = [ int(el[0]) for el in np.argsort(-gumbel_prior) ] posterior = [ el1 + el2 for el1, el2 in zip(posterior, selected_poste) ] prior = [el1 + el2 for el1, el2 in zip(prior, selected_prior)] print("prior: {} posterior: {}".format(prior, posterior)) elif model.hps.model == 'embmin': dist = res['selected_emb_idx'] for tmp in dist: prior[tmp] += 1 print(prior) end_time = time() print("{} epoch, {} step, {}sec, {} loss".format( int(step * model.hps.batch_size / sample_per_epoch), step, round(end_time - beg_time, 3), round(loss, 3))) summary_writer1.add_summary(summaries, step) if step % 5 == 0: dev_batch = valid_data_loader.next_batch() res = model.run_step(dev_batch, sess, is_train=False) loss, summaries, step = res['loss'], res['summaries'], res[ 'global_step'] assert step % 5 == 0 print("[VALID] {} loss".format(round(loss, 3))) summary_writer2.add_summary(summaries, step) if step == 10 or step % 2000 == 0: model.saver.save(sess, train_savedir, global_step=step) if int(step * model.hps.batch_size / sample_per_epoch) > model.hps.max_epoch: model.saver.save(sess, train_savedir, global_step=step) print("training end") break