def train_epochs(train_dataset, val_dataset, model, epochs=50): #------------------------------------------------------------- # define parameters and customize configuration EPOCHS = epochs # num Epochs of training BS = 128 # batch size, the number of images to train at one train_step INIT_LR = 1e-3 # initial learning rate optimizer = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS) loss_object = losses.SparseCategoricalCrossentropy(name='train_loss') train_loss = metrics.Mean(name='rain_loss') train_accuracy = metrics.SparseCategoricalAccuracy(name='train_acc') val_loss = metrics.Mean(name='val_loss') val_accuracy = metrics.SparseCategoricalAccuracy(name='val_acc') history = {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': []} for epoch in range(0, EPOCHS): # show the current epoch number sys.stdout.flush() epochStart = time.time() train_loss.reset_states() train_accuracy.reset_states() val_loss.reset_states() val_accuracy.reset_states() # for all training data set : 1237 images for images, labels in train_dataset: train_step(images, labels, model, loss_object, optimizer, train_loss, train_accuracy) # for all validation data set : 200 images for val_images, val_labels in val_dataset: val_step(val_images, val_labels, model, loss_object, val_loss, val_accuracy) template = 'Epoch {}/{}, loss: {}, acc: {}, val_loss: {}, val_acc: {}' print( template.format(epoch + 1, EPOCHS, train_loss.result(), train_accuracy.result() * 100, val_loss.result(), val_accuracy.result() * 100)) history['loss'].append(train_loss.result()) history['acc'].append(train_accuracy.result()) history['val_loss'].append(val_loss.result()) history['val_acc'].append(val_accuracy.result()) # show timing information for the epoch epochEnd = time.time() elapsed = (epochEnd - epochStart) / 60.0 print("took {:.4} minutes".format(elapsed)) # save weights model.save('mnist.h5') return history
def train_model(model, train_data, train_len, dev_data, args): optimizer = optimizers.Adam(learning_rate=args.lr) train_loss = metrics.Mean(name='train_loss') train_metric = metrics.SparseCategoricalAccuracy(name='train_accuracy') valid_loss = metrics.Mean(name='valid_loss') valid_metric = metrics.SparseCategoricalAccuracy(name='valid_accuracy') loss_fun = losses.SparseCategoricalCrossentropy() step = 0 best_valid_acc = 0 for epoch in tf.range(args.epochs): for input_ids, input_mask, labels in train_data: train_step(model, input_ids, input_mask, labels, optimizer, train_loss, train_metric, loss_fun) step += 1 # print('step',step) if step % 100 == 0 and step % ( (int(train_len / args.batch_size / 2 / 100)) * 100) != 0: logs = 'Epoch={},step={},Loss:{},Accuracy:{},Valid Loss:{},Valid Accuracy:{},best_valid_acc:{}' tf.print( tf.strings.format( logs, (epoch, step, train_loss.result(), train_metric.result(), valid_loss.result(), valid_metric.result(), best_valid_acc))) tf.print("") if step % ( (int(train_len / args.batch_size / 2 / 100)) * 100) == 0: for input_ids, input_mask, labels in dev_data: valid_step(model, input_ids, input_mask, labels, optimizer, valid_loss, valid_metric, loss_fun) if valid_metric.result() >= best_valid_acc: best_valid_acc = valid_metric.result() save_path = args.model_save_path # model.save(save_path,save_format='h5') # model.save(save_path, save_format='tf') model.save_weights(save_path, save_format='tf') logs = 'Epoch={},step={},Loss:{},Accuracy:{},Valid Loss:{},Valid Accuracy:{},best_valid_acc:{}' printbar() tf.print( tf.strings.format( logs, (epoch, step, train_loss.result(), train_metric.result(), valid_loss.result(), valid_metric.result(), best_valid_acc))) tf.print("") train_loss.reset_states() train_metric.reset_states() valid_loss.reset_states() valid_metric.reset_states()
def run(self): train_loss = metrics.Mean(name='train_loss') train_accuracy = metrics.SparseCategoricalAccuracy( name='train_accuracy') self.chooseLoss() self.chooseOptimizer() if self.loss_object is None: return "Error choosing cost function in " + self.frame + ": " + self.loss + " not available in TensorFlow" if self.optimizer_object is None: return "Error choosing optimizer in " + self.frame + ": " + self.optimizer + " not available in TensorFlow" with GradientTape() as tape: for epoch in range(self.epoch): predictions = self.model(self.inputTrain) loss = self.loss_object(self.outputTrain, predictions) gradients = tape.gradient(loss, self.model.trainable_variables) self.optimizer_object.apply_gradients( zip(gradients, self.model.trainable_variables)) train_loss(loss) train_accuracy(self.outputTrain, predictions) result = "Train --> Loss: " + str( train_loss.result()) + ", Accuracy: " + str( train_accuracy.result() * 100) if self.test is True: test_loss = metrics.Mean(name='test_loss') test_accuracy = metrics.SparseCategoricalAccuracy( name='test_accuracy') predictions = self.model(self.inputTest) t_loss = self.loss_object(self.outputTest, predictions) test_loss(t_loss) test_accuracy(self.outputTest, predictions) result = result + "Test --> Loss: " + str( test_loss.result()) + ", Accuracy: " + str( test_accuracy.result() * 100) self.saveModel() self.logger("Trained " + self.frame + " model saved correctly!") return result
def train(): num_words = 20000 sequence_length = 100 depth = 6 filters = 64 channels = 128 block_filters = [filters] * depth num_classes = 2 inputs = layers.Input(shape=(sequence_length, ), name="inputs") x = layers.Embedding(num_words, channels)(inputs) x = tcn.TCN(block_filters, kernel_size=8)(x) outputs = layers.Dense(num_classes, activation="softmax", name="output")(x) model = Model(inputs, outputs) model.compile(optimizer="Adam", metrics=[metrics.SparseCategoricalAccuracy()], loss=losses.SparseCategoricalCrossentropy()) print(model.summary()) train_dataset, test_dataset = load_dataset(num_words, sequence_length) model.fit(train_dataset.batch(32), validation_data=test_dataset.batch(32), callbacks=[ TensorBoard( str( Path("logs") / datetime.now().strftime("%Y-%m-%dT%H-%M_%S"))) ], epochs=5)
def train(): unet_model = unet.build_model(*oxford_iiit_pet.IMAGE_SIZE, channels=oxford_iiit_pet.channels, num_classes=oxford_iiit_pet.classes, layer_depth=4, filters_root=64, padding="same") unet.finalize_model(unet_model, loss=losses.SparseCategoricalCrossentropy(), metrics=[metrics.SparseCategoricalAccuracy()], auc=False, learning_rate=LEARNING_RATE) trainer = unet.Trainer(name="oxford_iiit_pet") train_dataset, validation_dataset = oxford_iiit_pet.load_data() trainer.fit(unet_model, train_dataset, validation_dataset, epochs=25, batch_size=1) return unet_model
def train(): depth = 6 filters = 25 block_filters = [filters] * depth sequence_length = 601 train_dataset, test_dataset = load_dataset(30000, sequence_length) model = tcn.build_model(sequence_length=sequence_length, channels=1, num_classes=10, filters=block_filters, kernel_size=8, return_sequence=True) model.compile(optimizer=optimizers.RMSprop(lr=5e-4, clipnorm=1.), metrics=[metrics.SparseCategoricalAccuracy()], loss=losses.SparseCategoricalCrossentropy()) print(model.summary()) model.fit(train_dataset.batch(32), validation_data=test_dataset.batch(32), callbacks=[ TensorBoard( str( Path("logs") / datetime.now().strftime("%Y-%m-%dT%H-%M_%S"))) ], epochs=10)
def train(): depth = 6 filters = 25 block_filters = [filters] * depth model = tcn.build_model(sequence_length=28 * 28, channels=1, num_classes=10, filters=block_filters, kernel_size=8) model.compile(optimizer="Adam", metrics=[metrics.SparseCategoricalAccuracy()], loss=losses.SparseCategoricalCrossentropy()) print(model.summary()) train_dataset, test_dataset = load_dataset() model.fit(train_dataset.batch(32), validation_data=test_dataset.batch(32), callbacks=[ TensorBoard( str( Path("logs") / datetime.now().strftime("%Y-%m-%dT%H-%M_%S"))) ], epochs=10)
def compile_model(model): model.compile(optimizer=optimizers.Nadam(), loss=losses.SparseCategoricalCrossentropy(), metrics=[ metrics.SparseCategoricalAccuracy(), metrics.SparseTopKCategoricalAccuracy(5) ]) return model
def model_fn(): # We _must_ create a new model here, and _not_ capture it from an external # scope. TFF will call this within different graph contexts. keras_model = create_keras_model() return tff.learning.from_keras_model( keras_model, input_spec=preprocessed_sample_dataset.element_spec, loss=losses.SparseCategoricalCrossentropy(), metrics=[metrics.SparseCategoricalAccuracy()])
def __init__(self, model, learning_rate=0.001, checkpoint_dir="model/checkpoints"): self.ckpt = tf.train.Checkpoint( epoch=tf.Variable(1), val_loss=tf.Variable(np.inf), optimizer=optimizers.Adam(lr=learning_rate), model=model) self.ckpt_manager = tf.train.CheckpointManager( checkpoint=self.ckpt, directory=checkpoint_dir, max_to_keep=3) self.restore_checkpoint() self.loss_fn = losses.SparseCategoricalCrossentropy(from_logits=True) self.train_metrics = [ metrics.Mean(), metrics.SparseCategoricalAccuracy() ] self.val_metrics = [ metrics.Mean(), metrics.SparseCategoricalAccuracy() ]
def _define_classifier(self, ): # Classifier input_series = keras.Input(shape=(self.sequence_length, self.num_features)) x = self.encoder(input_series) x = layers.Flatten()(x) x = layers.Dense(200, activation='tanh')(x) x = layers.Dense(64, activation='tanh')(x) x = layers.Dense(self.num_class, activation='softmax')(x) self.classifier = keras.Model(input_series, x) self.encoder.trainable = False optimizer = keras.optimizers.Adam(learning_rate=self.learning_rate) loss = losses.SparseCategoricalCrossentropy() self.classifier.compile(optimizer=optimizer, loss=loss, metrics=[metrics.SparseCategoricalAccuracy()]) self.classifier.summary()
def train(): depth = 6 filters = 25 block_filters = [filters] * depth print(block_filters) model = build_model(sequence_length=28 * 28, channels=1, num_classes=10, filters=block_filters, kernel_size=8) model.compile(optimizer="Adam", metrics=[metrics.SparseCategoricalAccuracy()], loss=losses.SparseCategoricalCrossentropy()) print(model.summary()) #train_dataset, test_dataset = load_dataset() """
model = Sequential([ layers.Dense(25, activation='sigmoid'), # 隐藏层1, 2 => 25 layers.Dense(50, activation='sigmoid'), # 隐藏层2, 25 => 50 layers.Dense(25, activation='sigmoid'), # 隐藏层3, 50 => 25 layers.Dense(2, activation=None) # 输出层, 25 => 2 ]) model.build(input_shape=(None, 2)) model.summary() optimizer = tf.keras.optimizers.SGD(learning_rate=0.1) train_db = tf.data.Dataset.from_tensor_slices((X_train, y_train)) train_db = train_db.batch(32) # 单个训练 train_mses, test_mses = [], [] train_accs = metrics.SparseCategoricalAccuracy() accs = [] for epoch in range(200): for step, (x, y) in enumerate(train_db): y_onehot = tf.one_hot(y, depth=2) with tf.GradientTape() as tape: out = model(x) loss = tf.keras.losses.categorical_crossentropy(y_onehot, out, from_logits=True) loss = tf.reduce_mean(loss)
strategy = tf.distribute.MirroredStrategy(devices=devices) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) print( "============================ Loading model from pretrained and compiling ===========================" ) with strategy.scope(): tokenizer = GPT2TokenizerFast.from_pretrained(model_name) tokenizer.pad_token = tokenizer.eos_token print("========================= Loading dataset ========================") train_dataset = tokenize(get_dataset(train_file), tokenizer, truncate).batch(num_gpus) valid_dataset = tokenize(get_dataset(valid_file), tokenizer, truncate).batch(num_gpus) model = TFGPT2LMHeadModel.from_pretrained(model_name) #Disable past key values model.config.use_cache = False optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) metric = metrics.SparseCategoricalAccuracy(name='Accuracy') model.compile(optimizer=optimizer, loss=[loss, *[None] * model.config.n_layer], metrics=[metric]) print( "========================= Finetuning Model ==================================" ) model.fit(train_dataset, batch_size=64, epochs=num_epochs) print( "========================= Evaluating Model ==================================" ) model.evaluate(valid_dataset)
def train(args) -> None: """Start training based on args input""" # Check if GPU is available print("\nNum GPUs Available: %d\n"\ % (len(tf.config.list_physical_devices('GPU')))) # Set tf.keras mixed precision to float16 set_keras_mixed_precision_policy('mixed_float16') # Create dataset save_svs_file, save_train_file, save_val_file \ = generate_dataset(args.data_dir_AD, args.data_dir_control, args.patch_size, force_regenerate=False) if args.fold_num != 0: # If using five-fold cross-validation save_svs_file, save_train_file, save_val_file \ = generate_five_fold_dataset(args.data_dir_AD, args.data_dir_control, args.patch_size, args.fold_num) # Load dataset train_dataset, val_dataset, class_weight \ = load_dataset(save_svs_file, save_train_file, save_val_file, args.batch_size) # Create network model model = get_model(args.model) #model.summary(120) #print(keras.backend.floatx()) class_names = ['Background', 'Gray Matter', 'White Matter'] model.compile(optimizer=optimizers.Adam(), loss=get_loss_func(args.loss_func, class_weight, gamma=args.focal_loss_gamma), metrics=[metrics.SparseCategoricalAccuracy(), SparseMeanIoU(num_classes=3, name='IoU/Mean'), SparsePixelAccuracy(num_classes=3, name='PixelAcc'), SparseMeanAccuracy(num_classes=3, name='MeanAcc'), SparseFreqIoU(num_classes=3, name='IoU/Freq_weighted'), SparseConfusionMatrix(num_classes=3, name='cm')] \ + SparseIoU.get_iou_metrics(num_classes=3, class_names=class_names)) # Create another checkpoint/log folder for model.name and timestamp args.ckpt_dir = os.path.join(args.ckpt_dir, model.name+'-'+args.file_suffix) args.log_dir = os.path.join(args.log_dir, 'fit', model.name+'-'+args.file_suffix) if args.fold_num != 0: # If using five-fold cross-validation args.ckpt_dir += f'_fold_{args.fold_num}' args.log_dir += f'_fold_{args.fold_num}' # Check if resume from training initial_epoch = 0 if args.ckpt_filepath is not None: if args.ckpt_weights_only: if args.ckpt_filepath.endswith('.index'): # Get rid of the suffix args.ckpt_filepath = args.ckpt_filepath.replace('.index', '') model.load_weights(args.ckpt_filepath).assert_existing_objects_matched() print('Model weights loaded') else: model = load_whole_model(args.ckpt_filepath) print('Whole model (weights + optimizer state) loaded') initial_epoch = int(args.ckpt_filepath.split('/')[-1]\ .split('-')[1]) # Save in same checkpoint_dir but different log_dir (add current time) args.ckpt_dir = os.path.abspath( os.path.dirname(args.ckpt_filepath)) args.log_dir = args.ckpt_dir.replace( 'checkpoints', 'tf_logs/fit') + f'-retrain_{args.file_suffix}' # Write configurations to log_dir log_configs(args.log_dir, save_svs_file, train_dataset, val_dataset, args) # Create checkpoint directory if not os.path.exists(args.ckpt_dir): os.makedirs(args.ckpt_dir) # Create log directory if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) # Create a callback that saves the model's weights every 1 epoch if val_dataset: ckpt_path = os.path.join( args.ckpt_dir, 'cp-{epoch:03d}-{val_IoU/Mean:.4f}.ckpt') else: ckpt_path = os.path.join( args.ckpt_dir, 'cp-{epoch:03d}-{IoU/Mean:.4f}.ckpt') cp_callback = callbacks.ModelCheckpoint( filepath=ckpt_path, verbose=1, save_weights_only=args.ckpt_weights_only, save_freq='epoch') # Create a TensorBoard callback tb_callback = callbacks.TensorBoard( log_dir=args.log_dir, histogram_freq=1, write_graph=True, write_images=False, update_freq='batch', profile_batch='100, 120') # Create a Lambda callback for plotting confusion matrix cm_callback = get_cm_callback(args.log_dir, class_names) # Create a TerminateOnNaN callback nan_callback = callbacks.TerminateOnNaN() # Create an EarlyStopping callback if val_dataset: es_callback = callbacks.EarlyStopping(monitor='val_IoU/Mean', min_delta=0.01, patience=3, verbose=1, mode='max') if val_dataset: model.fit( train_dataset, epochs=args.num_epochs, steps_per_epoch=len(train_dataset) \ if args.steps_per_epoch == -1 else args.steps_per_epoch, initial_epoch=initial_epoch, validation_data=val_dataset, validation_steps=len(val_dataset) // args.val_subsplits \ if args.val_steps == -1 else args.val_steps, callbacks=[cp_callback, tb_callback, nan_callback, cm_callback, es_callback]) else: model.fit( train_dataset, epochs=args.num_epochs, steps_per_epoch=len(train_dataset) \ if args.steps_per_epoch == -1 else args.steps_per_epoch, initial_epoch=initial_epoch, callbacks=[cp_callback, tb_callback, nan_callback, cm_callback]) # TODO: Switch to tf.data print('Training finished!')
x = tf.reshape(x, (-1, 28, 28, 1)) x = self.conv1(x) x = self.flatten(x) x = self.fc1(x) y = self.fc2(x) return y network = Network() # 确定目标损失函数、优化器、评价标准 loss_object = losses.SparseCategoricalCrossentropy() optimizer = optimizers.Adam() # 训练集上的损失值、精确度 train_loss = metrics.Mean(name='train_loss') train_accuracy = metrics.SparseCategoricalAccuracy(name='train_accuracy') # 测试集上的损失值、精确度 test_loss = metrics.Mean(name='test_loss') test_accuracy = metrics.SparseCategoricalAccuracy(name='test_accuracy') # 训练 def train_step(images, labels): with tf.GradientTape() as tape: # 建立梯度环境 predictions = network(images) # 前向计算 loss = loss_object(labels, predictions) # 计算损失 gradients = tape.gradient(loss, network.trainable_variables) # 计算网络中各个参数的梯度 optimizer.apply_gradients(zip(gradients, network.trainable_variables)) # 更新网络参数 train_loss(loss) # 计算训练损失
def train(start_step=0, restore=False): batch_size = 64 epochs = 10 train_ds, test_ds = load_data(batch_size) sample_images = tf.concat([images for images, _ in test_ds], axis=0).numpy() model = make_model() model.summary() learning_rate = optimizers.schedules.ExponentialDecay( initial_learning_rate=0.0001, decay_steps=100, decay_rate=0.99, staircase=True) optimizer = optimizers.Adam(learning_rate=learning_rate) checkpoint = tf.train.Checkpoint(step=tf.Variable(0), optimizer=optimizer, net=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, 'model/', max_to_keep=3) if restore: try: checkpoint.restore(f'model/ckpt-{start_step}') print(f"Restored from model/ckpt-{start_step}") except tf.errors.NotFoundError: checkpoint.restore(checkpoint_manager.latest_checkpoint) if checkpoint_manager.latest_checkpoint: start_step = checkpoint.step.numpy() print("Restored from {}".format( checkpoint_manager.latest_checkpoint)) else: start_step = 0 print("Initializing from scratch.") loss_object = losses.SparseCategoricalCrossentropy(from_logits=False) train_loss = metrics.Mean(name='train_loss') train_accuracy = metrics.SparseCategoricalAccuracy(name='train_accuracy') @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 20, 1), dtype=tf.uint8, name="train_images"), tf.TensorSpec(shape=(None, ), dtype=tf.int32, name="train_labels") ]) def train_step(images, labels): with tf.GradientTape() as tape: predictions = model(images) loss = loss_object(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(labels, predictions) test_loss = metrics.Mean(name='test_loss') test_accuracy = metrics.SparseCategoricalAccuracy(name='test_accuracy') @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 20, 1), dtype=tf.uint8, name="test_images"), tf.TensorSpec(shape=(None, ), dtype=tf.int32, name="test_labels") ]) def test_step(images, labels): predictions = model(images) test_loss(loss_object(labels, predictions)) test_accuracy(labels, predictions) current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") log_info = f"log/{current_time}/info" log_train = f"log/{current_time}/train" log_test = f"log/{current_time}/test" log_sample = f"log/{current_time}/sample" summary_writer_info = tf.summary.create_file_writer(log_info) summary_writer_train = tf.summary.create_file_writer(log_train) summary_writer_test = tf.summary.create_file_writer(log_test) summary_writer_sample = tf.summary.create_file_writer(log_sample) string = "23456789ABCDEFGHJKLMNPQRSTUVWXYZ" with summary_writer_info.as_default(): tf.summary.trace_on(graph=True, profiler=True) @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 20, 1), dtype=tf.uint8, name="sample_images"), tf.TensorSpec(shape=(None, ), dtype=tf.int32, name="sample_labels") ]) def create_graph(images, labels): predictions = model(images) return tf.reduce_mean(loss_object(labels, predictions)) _ = create_graph(tf.zeros((batch_size, 20, 20, 1), tf.uint8), tf.zeros((batch_size, ), tf.int32)) tf.summary.trace_export(name="graph", step=0, profiler_outdir=log_info) tf.summary.trace_off() for epoch in range(start_step, epochs): for train_images, train_labels in train_ds: train_step(train_images, train_labels) for test_images, test_labels in test_ds: test_step(test_images, test_labels) print( f"Epoch {epoch + 1}, Loss: {train_loss.result()}, Accuracy: {train_accuracy.result() * 100}%, Test Loss: {test_loss.result()}, Test Accuracy: {test_accuracy.result() * 100}%" ) with summary_writer_train.as_default(): tf.summary.scalar('loss', train_loss.result(), step=epoch) tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch) with summary_writer_test.as_default(): tf.summary.scalar('loss', test_loss.result(), step=epoch) tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch) with summary_writer_sample.as_default(): sample_predictions = tf.argmax(model(sample_images), axis=1).numpy() sample = visualize(sample_images, sample_predictions, lambda i: string[i]) tf.summary.image("sample", tf.expand_dims( tf.convert_to_tensor(np.array(sample)), 0), step=epoch) checkpoint.step.assign_add(1) checkpoint_manager.save() train_loss.reset_states() train_accuracy.reset_states() test_loss.reset_states() test_accuracy.reset_states() model.save("model/final.hdf5", save_format="hdf5")
x_train, x_val, t_train, t_val = \ train_test_split(x_train, t_train, test_size=0.2) ''' 2. モデルの構築 ''' model = DNN(200, 10) ''' 3. モデルの学習 ''' criterion = losses.SparseCategoricalCrossentropy() optimizer = optimizers.SGD(learning_rate=0.01, momentum=0.9) train_loss = metrics.Mean() train_acc = metrics.SparseCategoricalAccuracy() val_loss = metrics.Mean() val_acc = metrics.SparseCategoricalAccuracy() def compute_loss(t, y): return criterion(t, y) def train_step(x, t): with tf.GradientTape() as tape: preds = model(x) loss = compute_loss(t, preds) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) train_loss(loss) train_acc(t, preds)
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data() x_train = x_train / np.float32(255) x_test = x_test / np.float32(255) train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) return (train_dataset, (x_test, y_test)) train_data, (test_xs, test_ts) = mnist_dataset() train_data = train_data.shuffle(MNIST_TRAIN_COUNT).batch(BATCH_SIZE).repeat() train_data_iter = iter(train_data) model = MODELS[MODEL_ID]() optimizer = optimizers.Adam(learning_rate=0.0005) compute_loss = losses.SparseCategoricalCrossentropy() compute_accuracy = metrics.SparseCategoricalAccuracy() for epoch in count(1): for _ in range(BATCHES_PER_EPOCH): batch_xs, batch_ts = next(train_data_iter) with tf.GradientTape() as tape: outputs = model(batch_xs, training=True) loss = compute_loss(batch_ts, outputs) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) outputs = model(test_xs, training=False) loss = compute_loss(test_ts, outputs) accuracy = compute_accuracy(test_ts, outputs) print("Epoch: {}, Loss: {:.6f}, Accuracy: {:.6f}".format( epoch, loss * 10000, accuracy))
x = self.d1(x) x = self.d2(x) x = self.d3(x) return self.d4(x) # 예측치(확률) 반환 # 단계4. loss function : losses 모듈 대체 loss = losses.SparseCategoricalCrossentropy(from_logits=True) # 단계5. model & optimizer model = Model() optimizer = optimizers.Adam() # lr : 자동설정(lr=0.1) # 단계6. model test : 1epoch -> train/test loss and accuracy 측정 train_loss = metrics.Mean() # 전체 원소 -> 평균 객체 반환 train_acc = metrics.SparseCategoricalAccuracy() # 분류정확도 객체 반환 test_loss = metrics.Mean() test_acc = metrics.SparseCategoricalAccuracy() # 단계7. 역방향 step : tf.GradientTape 클래스 이용 @tf.function # 연산 속도 향상 def train_step(images, labels): # train step with tf.GradientTape() as tape: # 1) 순방향 preds = model(images, training=True) # 예측치 loss_value = loss(labels, preds) # 손실값 # 2) 역방향 grads = tape.gradient(loss_value, model.trainable_variables)
layers.experimental.preprocessing.Rescaling(1.0 / 255), layers.Conv2D( filters=32, kernel_size=(4, 4), activation="relu", input_shape=(IMAGE_SIZE, IMAGE_SIZE), ), layers.MaxPooling2D(pool_size=(2, 2)), layers.Flatten(), layers.Dense(1024, activation="relu"), layers.Dense(101, activation=None), ]) model.compile( optimizer=optimizers.Adam(), loss=losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[metrics.SparseCategoricalAccuracy()], ) model.fit( train, epochs=5, validation_data=validation, ) # Epoch 1/5 # 2021-02-28 12:40:56.219529: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2) # 74/74 [==============================] - 41s 529ms/step - loss: 4.5479 - sparse_categorical_accuracy: 0.0258 - val_loss: 4.3710 - val_sparse_categorical_accuracy: 0.0483 # Epoch 2/5 # 74/74 [==============================] - 15s 201ms/step - loss: 4.3756 - sparse_categorical_accuracy: 0.0495 - val_loss: 4.3269 - val_sparse_categorical_accuracy: 0.0564 # Epoch 3/5 # 74/74 [==============================] - 15s 201ms/step - loss: 4.3163 - sparse_categorical_accuracy: 0.0579 - val_loss: 4.2965 - val_sparse_categorical_accuracy: 0.0608 # Epoch 4/5
print("") # train_model(model, ds_train, ds_test, 10) # 自定义训练循环 #自定义训练循环无需编译模型,直接利用优化器根据损失函数反向传播迭代参数,拥有最高的灵活性。 tf.keras.backend.clear_session() optimizer = optimizers.Nadam() loss_func = losses.SparseCategoricalCrossentropy() train_loss = metrics.Mean(name='train_loss') train_metric = metrics.SparseCategoricalAccuracy(name="train_accuracy") valid_loss = metrics.Mean(name='valid_loss') valid_metric = metrics.SparseCategoricalAccuracy(name='valid_accuracy') @tf.function def train_step(model, features, labels): with tf.GradientTape() as tape: predictions = model(features, training=True) loss = loss_func(labels, predictions) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss.update_state(loss) train_metric.update_state(labels, predictions)
inputs = [input_ids, attention_mask, token_type_ids] # bert x = bert.layers[0](inputs) # only use pooled_output, x: sequence_output, pooled_output out = x[1] # fc layer(add layers for transfer learning) out = Dropout(0.25)(out) out = Dense(128, activation='relu')(out) out = Dropout(0.5)(out) out = Dense(num_classes, activation='softmax')(out) return Model(inputs=inputs, outputs=out) #%% """ 4.データのロード、モデルのコンパイル、モデルの学習実施 """ model = make_model(bert, num_classes , max_length) ## Load dataset, tokenizer, model from pretrained vocabulary train_dataset, valid_dataset = load_dataset(data_no_wakati, tokenizer, max_length=max_length, train_batch=train_batch, val_batch=val_batch) ## Prepare training: Compile tf.keras model with optimizer, loss and learning rate schedule optimizer = optimizers.Adam() loss = losses.SparseCategoricalCrossentropy() metric = metrics.SparseCategoricalAccuracy('accuracy') model.compile(optimizer=optimizer, loss=loss, metrics=[metric]) # Train and evaluate using tf.keras.Model.fit() history = model.fit(train_dataset, epochs=epochs, validation_data=valid_dataset) # %%
def build_model_1(self, verbose=False): """initialization the model""" if self.model_type in TrainModelConfigV2.BERT_LIST: config = BertConfig.from_pretrained( "{}{}/config.json".format(PATH_TRANS_INPUT, self.model_type), num_labels=BertBaseUnCaseV2.N_CLASS) bert_model = TFBertModel.from_pretrained("{}{}/tf_model.h5".format( PATH_TRANS_INPUT, self.model_type), config=config) bert_model.trainable = False input_ids_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ), dtype=np.int32, name='input_ids') input_mask_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ), dtype=np.int32, name='attention_mask') input_token_type_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ), dtype=np.int32, name='token_type_ids') input_layer_list = [ input_ids_layer, input_mask_layer, input_token_type_layer ] bert_layer = bert_model(input_layer_list)[0] elif self.model_type in TrainModelConfigV2.ROBERTA_LIST: config = RobertaConfig.from_pretrained( "{}{}/config.json".format(PATH_TRANS_INPUT, self.model_type), num_labels=BertBaseUnCaseV2.N_CLASS) bert_model = TFRobertaModel.from_pretrained( "{}{}/tf_model.h5".format(PATH_TRANS_INPUT, self.model_type), config=config) bert_model.trainable = False input_ids_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ), dtype=np.int32, name='input_ids') input_mask_layer = Input(shape=(BertBaseUnCaseV2.MAXLEN, ), dtype=np.int32, name='attention_mask') input_layer_list = [input_ids_layer, input_mask_layer] bert_layer = bert_model(input_layer_list)[0] if self.version == "v1": flat_layer = Flatten()(bert_layer) out = Dropout(0.2)(flat_layer) elif self.version == "v2": out = LSTM(BertBaseUnCaseV2.hidden_size, dropout=0.2)(bert_layer) elif self.version == "v3": flat_layer = Flatten()(bert_layer) dense_layer = Dense(BertBaseUnCaseV2.hidden_size, activation='relu')(flat_layer) out = Dropout(0.2)(dense_layer) elif self.version == "v4": bi_layer = Bidirectional( LSTM(BertBaseUnCaseV2.hidden_size, dropout=0.2, return_sequences=True))(bert_layer) bi_layer = Bidirectional(LSTM( BertBaseUnCaseV2.hidden_size))(bi_layer) dropout_layer = Dropout(0.2)(bi_layer) out = Dense(256, activation='relu')(dropout_layer) if BertBaseUnCaseV2.VER == 'v5': dense_output = Dense(BertBaseUnCaseV2.N_CLASS, activation='sigmoid')(out) else: dense_output = Dense(BertBaseUnCaseV2.N_CLASS, activation='softmax')(out) model = Model(inputs=input_layer_list, outputs=dense_output) # compile and fit if BertBaseUnCaseV2.VER == 'v5': optimizer = optimizers.Adam(learning_rate=BertBaseUnCaseV2.lr) loss = losses.SparseCategoricalCrossentropy(from_logits=True) metric = metrics.SparseCategoricalAccuracy('accuracy') else: optimizer = optimizers.Adam(learning_rate=BertBaseUnCaseV2.lr) loss = losses.SparseCategoricalCrossentropy(from_logits=True) metric = metrics.SparseCategoricalAccuracy('accuracy') model.compile(optimizer=optimizer, loss=loss, metrics=[metric]) if verbose: model.summary() return model
def compile_with_AdamSparseCategoricalCrossentropySparseCategoricalAccuracy(self): self.compile( optimizers.Adam(), loss=losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[metrics.SparseCategoricalAccuracy()]) return self