def Decode(save_file): dataset = ASR_align_DataSet(trans_file=args.dirs.train.trans, align_file=None, uttid2wav=args.dirs.train.wav_scp, feat_len_file=args.dirs.train.feat_len, args=args, _shuffle=False, transform=True) dataset_dev = ASR_align_DataSet(trans_file=args.dirs.dev.trans, align_file=args.dirs.dev.align, uttid2wav=args.dirs.dev.wav_scp, feat_len_file=args.dirs.dev.feat_len, args=args, _shuffle=False, transform=True) feature_dev = TFData(dataset=dataset_dev, dir_save=args.dirs.dev.tfdata, args=args).read() feature_dev = feature_dev.padded_batch(args.batch_size, ((), [None, args.dim_input])) G = PhoneClassifier(args) G.summary() optimizer_G = tf.keras.optimizers.Adam(1e-4) ckpt = tf.train.Checkpoint(G=G, optimizer_G=optimizer_G) ckpt_manager = tf.train.CheckpointManager(ckpt, args.dirs.checkpoint, max_to_keep=1) ckpt.restore(ckpt_manager.latest_checkpoint) print('checkpoint {} restored!!'.format(ckpt_manager.latest_checkpoint)) fer, cer = evaluate(feature_dev, dataset_dev, args.data.dev_size, G) decode(dataset, G, args.idx2token, 'output/' + save_file)
def train(Model): # load external LM with tf.device("/cpu:0"): dataset_dev = ASR_align_DataSet( file=[args.dirs.dev.data], args=args, _shuffle=False, transform=True) tfdata_train = TFData(dataset=None, dataAttr=['feature', 'label', 'align'], dir_save=args.dirs.train.tfdata, args=args).read(_shuffle=False) tfdata_dev = TFData(dataset=None, dataAttr=['feature', 'label', 'align'], dir_save=args.dirs.dev.tfdata, args=args).read(_shuffle=False) x_0, y_0, aligns_0 = next(iter(tfdata_train.take(args.num_supervised).\ padded_batch(args.num_supervised, ([None, args.dim_input], [None], [None])))) iter_train = iter(tfdata_train.cache().repeat().shuffle(3000).\ padded_batch(args.batch_size, ([None, args.dim_input], [None], [None])).prefetch(buffer_size=3)) tfdata_dev = tfdata_dev.padded_batch(args.batch_size, ([None, args.dim_input], [None], [None])) # get dataset ngram ngram_py, total_num = read_ngram(args.data.k, args.dirs.ngram, args.token2idx, type='list') kernel, py = ngram2kernel(ngram_py, args) # create model paremeters model = Model(args) compute_p_ngram = P_Ngram(kernel, args) model.summary() compute_p_ngram.summary() # build optimizer if args.opti.type == 'adam': optimizer = tf.keras.optimizers.Adam(args.opti.lr, beta_1=0.5, beta_2=0.9) # optimizer = tf.keras.optimizers.Adam(args.opti.lr*0.1, beta_1=0.5, beta_2=0.9) elif args.opti.type == 'sgd': optimizer = tf.keras.optimizers.SGD(lr=args.opti.lr, momentum=0.9, decay=0.98) writer = tf.summary.create_file_writer(str(args.dir_log)) ckpt = tf.train.Checkpoint(model=model, optimizer = optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, args.dir_checkpoint, max_to_keep=5) step = 0 # if a checkpoint exists, restore the latest checkpoint. if args.dirs.checkpoint: _ckpt_manager = tf.train.CheckpointManager(ckpt, args.dirs.checkpoint, max_to_keep=1) ckpt.restore(_ckpt_manager.latest_checkpoint) print ('checkpoint {} restored!!'.format(_ckpt_manager.latest_checkpoint)) step = int(_ckpt_manager.latest_checkpoint.split('-')[-1]) start_time = datetime.now() num_processed = 0 progress = 0 # step = 1600 while step < 99999999: start = time() x, _, aligns = next(iter_train) loss_EODM, loss_fs = train_step(x, aligns, py, model, compute_p_ngram, optimizer, args.lambda_fs) loss_supervise = train_G_supervised(x_0, y_0, model, optimizer, args.dim_output) num_processed += len(x) progress = num_processed / args.data.train_size if step % 10 == 0: print('EODM loss: {:.2f}\tloss_fs: {:.3f} * {}\tloss_supervise: {:.3f} * {}\tbatch: {} time: {:.2f} s {:.3f}% step: {}'.format( loss_EODM, loss_fs, args.lambda_fs, loss_supervise, args.lambda_supervision, x.shape, time()-start, progress*100.0, step)) with writer.as_default(): tf.summary.scalar("costs/loss_EODM", loss_EODM, step=step) tf.summary.scalar("costs/loss_fs", loss_fs, step=step) tf.summary.scalar("costs/loss_supervise", loss_supervise, step=step) if step % args.dev_step == 0: fer, cer = evaluation(tfdata_dev, args.data.dev_size, model) with writer.as_default(): tf.summary.scalar("performance/fer", fer, step=step) tf.summary.scalar("performance/cer", cer, step=step) if step % args.decode_step == 0: decode(dataset_dev[0], model) if step % args.save_step == 0: save_path = ckpt_manager.save(step) print('save model {}'.format(save_path)) step += 1 print('training duration: {:.2f}h'.format((datetime.now()-start_time).total_seconds()/3600))
def train(): dataset_dev = ASR_align_DataSet( file=[args.dirs.dev.data], args=args, _shuffle=False, transform=True) with tf.device("/cpu:0"): # wav data tfdata_train = TFData(dataset=None, dataAttr=['feature', 'label', 'align'], dir_save=args.dirs.train.tfdata, args=args).read(_shuffle=False) tfdata_dev = TFData(dataset=None, dataAttr=['feature', 'label', 'align'], dir_save=args.dirs.dev.tfdata, args=args).read(_shuffle=False) x_0, y_0, _ = next(iter(tfdata_train.take(args.num_supervised).map(lambda x, y, z: (x, y, z[:args.max_seq_len])).\ padded_batch(args.num_supervised, ([None, args.dim_input], [None], [None])))) iter_train = iter(tfdata_train.cache().repeat().shuffle(3000).map(lambda x, y, z: (x, y, z[:args.max_seq_len])).\ padded_batch(args.batch_size, ([None, args.dim_input], [None], [args.max_seq_len])).prefetch(buffer_size=3)) tfdata_dev = tfdata_dev.padded_batch(args.batch_size, ([None, args.dim_input], [None], [None])) # text data dataset_text = TextDataSet( list_files=[args.dirs.lm.data], args=args, _shuffle=True) tfdata_train_text = tf.data.Dataset.from_generator( dataset_text, (tf.int32), (tf.TensorShape([None]))) iter_text = iter(tfdata_train_text.cache().repeat().shuffle(100).map(lambda x: x[:args.max_seq_len]).padded_batch(args.batch_size, ([args.max_seq_len])).prefetch(buffer_size=5)) # create model paremeters G = PhoneClassifier(args) D = PhoneDiscriminator2(args) G.summary() D.summary() optimizer_G = tf.keras.optimizers.Adam(args.opti.G.lr, beta_1=0.5, beta_2=0.9) optimizer_D = tf.keras.optimizers.Adam(args.opti.D.lr, beta_1=0.5, beta_2=0.9) optimizer = tf.keras.optimizers.Adam(args.opti.G.lr, beta_1=0.5, beta_2=0.9) writer = tf.summary.create_file_writer(str(args.dir_log)) ckpt = tf.train.Checkpoint(G=G, optimizer_G = optimizer_G) ckpt_manager = tf.train.CheckpointManager(ckpt, args.dir_checkpoint, max_to_keep=5) step = 0 # if a checkpoint exists, restore the latest checkpoint. if args.dirs.checkpoint: _ckpt_manager = tf.train.CheckpointManager(ckpt, args.dirs.checkpoint, max_to_keep=1) ckpt.restore(_ckpt_manager.latest_checkpoint) print('checkpoint {} restored!!'.format(_ckpt_manager.latest_checkpoint)) step = int(_ckpt_manager.latest_checkpoint.split('-')[-1]) start_time = datetime.now() num_processed = 0 progress = 0 while step < 99999999: start = time() for _ in range(args.opti.D_G_rate): x, _, aligns = next(iter_train) text = next(iter_text) P_Real = tf.one_hot(text, args.dim_output) cost_D, gp = train_D(x, aligns, P_Real, text>0, G, D, optimizer_D, args.lambda_gp) x, _, aligns = next(iter_train) cost_G, fs = train_G(x, aligns, G, D, optimizer_G, args.lambda_fs) loss_supervise = train_G_supervised(x_0, y_0, G, optimizer_G, args.dim_output) num_processed += len(x) if step % 10 == 0: print('cost_G: {:.3f}|{:.3f}\tcost_D: {:.3f}|{:.3f}\tloss_supervise: {:.3f}\tbatch: {}|{}\tused: {:.3f}\t {:.3f}% iter: {}'.format( cost_G, fs, cost_D, gp, loss_supervise, x.shape, text.shape, time()-start, progress*100.0, step)) with writer.as_default(): tf.summary.scalar("costs/cost_G", cost_G, step=step) tf.summary.scalar("costs/cost_D", cost_D, step=step) tf.summary.scalar("costs/gp", gp, step=step) tf.summary.scalar("costs/fs", fs, step=step) tf.summary.scalar("costs/loss_supervise", loss_supervise, step=step) if step % args.dev_step == 0: fer, cer = evaluation(tfdata_dev, args.data.dev_size, G) with writer.as_default(): tf.summary.scalar("performance/fer", fer, step=step) tf.summary.scalar("performance/cer", cer, step=step) if step % args.decode_step == 0: decode(dataset_dev[0], G) if step % args.save_step == 0: save_path = ckpt_manager.save(step) print('save model {}'.format(save_path)) step += 1 print('training duration: {:.2f}h'.format((datetime.now()-start_time).total_seconds()/3600))
def train(): dataset_dev = ASR_align_DataSet(file=[args.dirs.dev.data], args=args, _shuffle=False, transform=True) with tf.device("/cpu:0"): # wav data tfdata_train = TFData(dataset=None, dataAttr=['feature', 'label', 'align'], dir_save=args.dirs.train.tfdata, args=args).read(_shuffle=False) tfdata_dev = TFData(dataset=None, dataAttr=['feature', 'label', 'align'], dir_save=args.dirs.dev.tfdata, args=args).read(_shuffle=False) if args.num_supervised: x_0, y_0, aligns_0 = next(iter(tfdata_train.take(args.num_supervised).\ padded_batch(args.num_supervised, ([None, args.dim_input], [None], [None])))) iter_train = iter( tfdata_train.cache().repeat().shuffle(500).padded_batch( args.batch_size, ([None, args.dim_input ], [None], [None])).prefetch(buffer_size=5)) tfdata_dev = tfdata_dev.padded_batch( args.batch_size, ([None, args.dim_input], [None], [None])) # create model paremeters model = PhoneClassifier(args) model.summary() optimizer_G = tf.keras.optimizers.Adam(args.opti.lr, beta_1=0.5, beta_2=0.9) writer = tf.summary.create_file_writer(str(args.dir_log)) ckpt = tf.train.Checkpoint(model=model, optimizer_G=optimizer_G) ckpt_manager = tf.train.CheckpointManager(ckpt, args.dir_checkpoint, max_to_keep=5) step = 0 # if a checkpoint exists, restore the latest checkpoint. if args.dirs.checkpoint: _ckpt_manager = tf.train.CheckpointManager(ckpt, args.dirs.checkpoint, max_to_keep=1) ckpt.restore(_ckpt_manager.latest_checkpoint) print('checkpoint {} restored!!'.format( _ckpt_manager.latest_checkpoint)) step = int(_ckpt_manager.latest_checkpoint.split('-')[-1]) start_time = datetime.now() while step < 99999999: start = time() if args.num_supervised: x = x_0 loss_supervise = train_G_supervised(x_0, y_0, model, optimizer_G, args.dim_output) else: x, y, aligns = next(iter_train) loss_supervise = train_G_supervised(x, y, model, optimizer_G, args.dim_output) if step % 10 == 0: print('loss_supervise: {:.3f}\tbatch: {}\tused: {:.3f}\tstep: {}'. format(loss_supervise, x.shape, time() - start, step)) with writer.as_default(): tf.summary.scalar("costs/loss_supervise", loss_supervise, step=step) if step % args.dev_step == 0: fer, cer = evaluation(tfdata_dev, args.data.dev_size, model) with writer.as_default(): tf.summary.scalar("performance/fer", fer, step=step) tf.summary.scalar("performance/cer", cer, step=step) if step % args.decode_step == 0: decode(dataset_dev[0], model) if step % args.save_step == 0: save_path = ckpt_manager.save(step) print('save model {}'.format(save_path)) step += 1 print('training duration: {:.2f}h'.format( (datetime.now() - start_time).total_seconds() / 3600))