# load model learning_rate = criterion.CustomSchedule(par.embedding_dim) opt = Adam(l_r, beta_1=0.9, beta_2=0.98, epsilon=1e-9) strategy = tf.distribute.MirroredStrategy() # define model with strategy.scope(): mt = MusicTransformer(embedding_dim=256, vocab_size=par.vocab_size, num_layer=6, max_seq=max_seq, dropout=0.2, debug=False, loader_path=load_path) mt.compile(optimizer=opt, loss=criterion.transformer_dist_train_loss) # Train Start for e in range(epochs): mt.reset_metrics() for b in range(len(dataset.files) // batch_size): try: batch_x, batch_y = dataset.seq2seq_batch(batch_size, max_seq) except: continue result_metrics = mt.train_on_batch(batch_x, batch_y) if b % 100 == 0: eval_x, eval_y = dataset.seq2seq_batch(batch_size, max_seq, 'eval') eval_result_metrics = mt.evaluate(eval_x, eval_y) mt.save(save_path)
# load model learning_rate = callback.CustomSchedule(par.embedding_dim) opt = Adam(l_r, beta_1=0.9, beta_2=0.98, epsilon=1e-9) strategy = tf.distribute.MirroredStrategy() # define model with strategy.scope(): mt = MusicTransformer(embedding_dim=256, vocab_size=par.vocab_size, num_layer=6, max_seq=max_seq, dropout=0.2, debug=False, loader_path=load_path) mt.compile(optimizer=opt, loss=callback.transformer_dist_train_loss) # Train Start for e in range(epochs): mt.reset_metrics() for b in range(len(dataset.files) // batch_size): try: batch_x, batch_y = dataset.seq2seq_batch(batch_size, max_seq) except: continue result_metrics = mt.train_on_batch(batch_x, batch_y) if b % 100 == 0: eval_x, eval_y = dataset.seq2seq_batch(batch_size, max_seq, 'eval') eval_result_metrics = mt.evaluate(eval_x, eval_y) mt.save(save_path)
import params as par from tensorflow.python import enable_eager_execution from tensorflow.python.keras.optimizer_v2.adam import Adam from tensorflow.python.keras.optimizer_v2.gradient_descent import SGD from data import Data import utils enable_eager_execution() tf.executing_eagerly() if __name__ == '__main__': epoch = 100 batch = 1000 dataset = Data('dataset/processed/') opt = Adam(0.0001) # opt = SGD(lr=0.0001, momentum=0.0, decay=0.0, nesterov=False) mt = MusicTransformer(embedding_dim=par.embedding_dim, vocab_size=par.vocab_size, num_layer=6, max_seq=100, debug=True) mt.compile(optimizer=opt, loss=callback.TransformerLoss()) for e in range(epoch): for b in range(batch): batch_x, batch_y = dataset.seq2seq_batch(2, 100) result_metrics = mt.train_on_batch(batch_x, batch_y) print('Loss: {:6.6}, Accuracy: {:3.2}'.format( result_metrics[0], result_metrics[1]))