def __init__(self, args, data_loader, valid_loader=None): self.data_loader = data_loader self.valid_loader = valid_loader self.mode = args.mode self.batch_size = args.batch_size self.mixed_training = args.mixed_training self.n_epochs = args.n_epochs self.save_dir = args.save_dir if args.mixed_training: # 計算を早くする為にfloat16で計算する. # kerasのmixed_precicion.policyで設定可能. policy = mixed_precicion.Policy('mixed_float16') mixed_precicion.set_policy(policy) self.n_classes = len(np.unique(data_loader.y_train)) """ cifar10からMNISTに変更する為,以下の用にmodelのgetの仕方を変更した. """ #self.model = get_model((None, None, 3), self.n_classes) w = data_loader.x_test.shape[1] h = data_loader.x_test.shape[2] self.model = get_model((w, h, 1), 10) print("model input : " + str(self.model.input)) print("model output : " + str(self.model.output)) self.model.compile(loss=[ losses.SparseCategoricalCrossentropy(), losses.SparseCategoricalCrossentropy() ], optimizer=optimizers.Adam(lr=args.lr), metrics=['acc'])
def newModel(self) -> tf.keras.Model: if self.modelstr == "MobileNetV2": base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) base_model.trainable = True inputs = tf.keras.Input(shape=(224, 224, 3), name="image_input") x = base_model(inputs, training=True, ) x = GlobalAveragePooling2D()(x) x = BatchNormalization()(x) x = Dropout(0.3)(x) x = Dense(1024, activation='relu')(x) x = Dense(1024, activation='relu')(x) x = Dense(512, activation='relu')(x) x = Dense(128, activation='relu')(x) preds = Dense(3, activation='softmax')(x) self.model = tf.keras.Model(inputs=inputs, outputs=[preds], name="mobileNetV2") self.model.compile(loss=losses.SparseCategoricalCrossentropy(from_logits=False), optimizer=optimizers.Adam(learning_rate=1e-3), metrics=['accuracy']) else: # EfficientNet def unfreeze_model(model): # We unfreeze the top 20 layers while leaving BatchNorm layers frozen for layer in model.layers[-20:]: if not isinstance(layer, layers.BatchNormalization): layer.trainable = True inputs = tf.keras.Input(shape=(224, 224, 3), name="image_input") conv_base = EfficientNetB0(input_shape=(224, 224, 3), input_tensor=inputs, drop_connect_rate=0.4, include_top=False) conv_base.trainable = False unfreeze_model(conv_base) x = GlobalAveragePooling2D()(conv_base.output) x = BatchNormalization()(x) x = Dropout(0.3)(x) x = Dense(512, activation='relu')(x) x = Dense(256, activation='relu')(x) x = Dense(128, activation='relu')(x) preds = Dense(3, activation='softmax')(x) self.model = tf.keras.Model(inputs=inputs, outputs=[preds], name="efficientNetB0") self.model.compile(loss=losses.SparseCategoricalCrossentropy(from_logits=False), optimizer=optimizers.Adam(learning_rate=1e-3), metrics=['accuracy']) return self.model
def actor_loss(self, acts_and_advs, logits): actions, advantages = tf.split(acts_and_advs, 2, axis=-1) weighted_sparse_ce = kls.SparseCategoricalCrossentropy(from_logits=True) actions = tf.cast(actions, tf.int32) policy_loss = weighted_sparse_ce(actions, logits, sample_weight=advantages) return policy_loss
def train(): num_words = 20000 sequence_length = 100 depth = 6 filters = 64 channels = 128 block_filters = [filters] * depth num_classes = 2 inputs = layers.Input(shape=(sequence_length, ), name="inputs") x = layers.Embedding(num_words, channels)(inputs) x = tcn.TCN(block_filters, kernel_size=8)(x) outputs = layers.Dense(num_classes, activation="softmax", name="output")(x) model = Model(inputs, outputs) model.compile(optimizer="Adam", metrics=[metrics.SparseCategoricalAccuracy()], loss=losses.SparseCategoricalCrossentropy()) print(model.summary()) train_dataset, test_dataset = load_dataset(num_words, sequence_length) model.fit(train_dataset.batch(32), validation_data=test_dataset.batch(32), callbacks=[ TensorBoard( str( Path("logs") / datetime.now().strftime("%Y-%m-%dT%H-%M_%S"))) ], epochs=5)
def create_model(lr=0.0004, beta1=0.75, beta2=0.95, dropout=0.4): model = models.Sequential() model.add( layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1), padding="same")) model.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same")) model.add(layers.MaxPooling2D((2, 2), 2)) model.add(layers.Dropout(dropout)) model.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same")) model.add(layers.Conv2D(128, (3, 3), activation='relu', padding="same")) model.add(layers.MaxPooling2D((2, 2), 2)) model.add(layers.Dropout(dropout)) model.add(layers.Flatten()) model.add(layers.Dense(256, activation='relu')) model.add(layers.Dense(10)) adam = optimizers.Adam(lr, beta1, beta2) model.compile(optimizer=adam, loss=losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) return model
def actor_loss(self, states, actions, values, rewards, next_values, dones): policy = self.actor_model( tf.convert_to_tensor(np.vstack(states), dtype=tf.float32)) advantages = [] for i in range(len(states)): reward = np.array(rewards[i]) value = np.array(values[i]) next_value = np.array(next_values[i]) if dones[i]: advantages.append(reward - value) else: advantages.append(reward + self.df * next_value - value) advantages = tf.reshape(advantages, [len(states)]) tf.convert_to_tensor(advantages, dtype=tf.float32) # SparseCategoricalCrossentropy entropy = losses.categorical_crossentropy(policy, policy, from_logits=True) ce_loss = losses.SparseCategoricalCrossentropy(from_logits=True) #policy_loss = ce_loss(actions, policy, sample_weight=np.array(advantages)) # same way log_pi = ce_loss(actions, policy) policy_loss = log_pi * np.array(advantages) policy_loss = tf.reduce_mean(policy_loss) log_pi = tf.reduce_mean(log_pi) return policy_loss - self.en * entropy, log_pi
def setUp(self): """Setup shared by all tests""" self.scce = losses.SparseCategoricalCrossentropy() self.bscce_equal = BalancedSCCE([1, 1, 1]) self.class_weights = [0.2, 0.3, 0.5] self.bscce_unequal = BalancedSCCE(self.class_weights)
def train(): depth = 6 filters = 25 block_filters = [filters] * depth sequence_length = 601 train_dataset, test_dataset = load_dataset(30000, sequence_length) model = tcn.build_model(sequence_length=sequence_length, channels=1, num_classes=10, filters=block_filters, kernel_size=8, return_sequence=True) model.compile(optimizer=optimizers.RMSprop(lr=5e-4, clipnorm=1.), metrics=[metrics.SparseCategoricalAccuracy()], loss=losses.SparseCategoricalCrossentropy()) print(model.summary()) model.fit(train_dataset.batch(32), validation_data=test_dataset.batch(32), callbacks=[ TensorBoard( str( Path("logs") / datetime.now().strftime("%Y-%m-%dT%H-%M_%S"))) ], epochs=10)
def __init__(self, policymodel, valuemodel, data_sz=256, batch_sz=80, lr=0.000085, entropy_const=1e-6, epochs=20): #self.model = model self.policymodel = policymodel self.valuemodel = valuemodel self.policymodel.compile( optimizer=tf.keras.optimizers.RMSprop(learning_rate=lr), loss=[self._logits_loss]) self.valuemodel.compile( optimizer=tf.keras.optimizers.RMSprop(learning_rate=lr), loss=[self._value_loss]) self.gamma = 1 #discounting self.data_sz = data_sz self.batch_sz = batch_sz #batch size self.epochs = epochs self.entropy_const = entropy_const #constant for entropy maximization term in logit loss function self.logit2logprob = kls.SparseCategoricalCrossentropy( from_logits=True ) #tensorflow built in converts logits to log probability
def action_loss(self, actions, advantages, policy_prediction): actions = tf.cast(actions, tf.int32) policy_loss = kls.SparseCategoricalCrossentropy(from_logits=True)( actions, policy_prediction, sample_weight=advantages) policy_2 = tf.nn.softmax(policy_prediction) entropy_loss = kls.categorical_crossentropy(policy_2, policy_2) return policy_loss - self.params['entropy'] * entropy_loss
def test_logits_2d(self): """Testing logits for 2D data""" y_true = [[1, 2], [0, 2]] y_pred = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1]], [[0.1, 0.2, 0.7], [0.3, 0.5, 0.2]]] self.scce = losses.SparseCategoricalCrossentropy( from_logits=True, reduction=losses.Reduction.NONE) scce = self.scce(y_true, y_pred).numpy() self.sfl1 = SparseFocalLoss(gamma=self.gamma1, from_logits=True) self.sfl2 = SparseFocalLoss(gamma=self.gamma2, from_logits=True) scce1 = scce * \ (1 - np.where(get_one_hot(y_true), softmax(y_pred), 0).sum(axis=-1))\ **self.gamma1 scce1 = scce1.mean() sfl1 = self.sfl1(y_true, y_pred).numpy() np.testing.assert_allclose(scce1, sfl1, rtol=1e-7) scce2 = scce * \ (1 - np.where(get_one_hot(y_true), softmax(y_pred), 0).sum(axis=-1))\ **self.gamma2 scce2 = scce2.mean() sfl2 = self.sfl2(y_true, y_pred).numpy() np.testing.assert_allclose(scce2, sfl2, rtol=1e-6)
def train(): depth = 6 filters = 25 block_filters = [filters] * depth model = tcn.build_model(sequence_length=28 * 28, channels=1, num_classes=10, filters=block_filters, kernel_size=8) model.compile(optimizer="Adam", metrics=[metrics.SparseCategoricalAccuracy()], loss=losses.SparseCategoricalCrossentropy()) print(model.summary()) train_dataset, test_dataset = load_dataset() model.fit(train_dataset.batch(32), validation_data=test_dataset.batch(32), callbacks=[ TensorBoard( str( Path("logs") / datetime.now().strftime("%Y-%m-%dT%H-%M_%S"))) ], epochs=10)
def train_model(): model = build_model() print(model.summary()) optimizer = optimizers.Adam(learning_rate=0.001) loss = losses.SparseCategoricalCrossentropy(from_logits=True) model.compile(optimizer=optimizer, loss=loss, metrics=['acc']) (x_train, y_train), (x_test, y_test) = load_preprocess_data() epochs = 10 n_train = 60000 n_test = 10000 batch_size = 32 steps_per_epoch = n_train // batch_size validation_steps = n_test // batch_size train_data_set = convert_to_data_set(x_train, y_train, repeat_times=epochs, shuffle_buffer_size=n_train, batch_size=batch_size) val_data_set = convert_to_data_set(x_test, y_test, repeat_times=epochs, shuffle_buffer_size=n_test, batch_size=batch_size) my_callbacks = [] early_stopping_cb = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) my_callbacks.append(early_stopping_cb) tensorboard_cb = callbacks.TensorBoard(log_dir='logs') my_callbacks.append(tensorboard_cb) checkpoint_path = 'models/base_cnn/ckpt' checkpoint_cb = callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, save_best_only=True) my_callbacks.append(checkpoint_cb) history = model.fit(train_data_set, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_data=val_data_set, validation_steps=validation_steps, callbacks=my_callbacks) print('\n\n') train_result = model.evaluate(x_train, y_train) format_result(train_result, name='train') val_result = model.evaluate(x_test, y_test) format_result(val_result, name='val') return history
def CNN(self, model): model.add( layers.Conv2D(filters=32, kernel_size=(3, 3), activation="relu", input_shape=(28, 28, 1))) model.add(layers.MaxPooling2D(pool_size=(2, 2))) model.add( layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu")) model.add(layers.MaxPooling2D(pool_size=(2, 2))) model.add(layers.Flatten()) model.add(layers.Dropout(0.5)) model.add(layers.Dense(128, activation="relu")) model.add(layers.Dropout(0.5)) model.add(layers.Dense(64, activation="relu")) model.add(layers.Dropout(0.5)) model.add(layers.Dense(10)) model.summary() model.compile( optimizer="adam", loss=losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['acc']) print("Optimizer: ", model.optimizer)
def _actor_loss(self, acts_and_advs, actor_logits): """The custom loss function for the actor For explanation of how tf/keras calls it, see critic loss above and reference. y_true = targets are actions and advantages. The y_pred = policy = output (logits) of the actor network (not normalized) """ actions, advantages = tf.split(acts_and_advs, 2, axis=-1) # sparse categorical CE loss obj that supports sample_weight arg on call() # from_logits argument ensures transformation into normalized probabilities weighted_sparse_ce = kls.SparseCategoricalCrossentropy( from_logits=True) # policy loss is defined by policy gradients, weighted by advantages # note: we only calculate the loss on the actions we've actually taken # that and because A2C is on policy method is the reason why policy gardients # ususally require many episodeds to converge actions = tf.cast(actions, tf.int32) policy_loss = weighted_sparse_ce(actions, actor_logits, sample_weight=advantages) # entropy loss can be calculated via CE over itself entropy_loss = kls.categorical_crossentropy(actor_logits, actor_logits, from_logits=True) #signs are flipped because optimizer minimizes return policy_loss - self.ENTROPY_FACTOR * entropy_loss
def __init__(self,args,config_path,database,network_cls): self.config = parse_config(config_path) config = self.config self.args = args self.database = database self.network_cls = network_cls # Initialize an optimizer and an loss func if self.config["clip_norm"]==.0: opt_dict = {"learning_rate":self.config["lr"]} elif self.config["clip_norm"]>.0: opt_dict = {"learning_rate":self.config["lr"],"clipnorm":self.config["clip_norm"]} else: raise ValueError("clip_norm should be 0(No clipping) or greater float") self.optimizer = getattr(optimizers, args.optimizer)(**opt_dict) if self.args.binary: self.loss_func = losses.BinaryCrossentropy(from_logits=True) else: self.loss_func = losses.SparseCategoricalCrossentropy(from_logits=True) self.ckpt_cb = tf.keras.callbacks.ModelCheckpoint( os.path.join(config["model_path"],config["log_path"],config["ckpt_path"],f"{args.encoder_name}.ckpt")) self.csv_log_cb = tf.keras.callbacks.CSVLogger( os.path.join(config["model_path"],config["log_path"],config["csv_path"],"log.csv")) self.tb_cb = tf.keras.callbacks.TensorBoard( os.path.join(config["model_path"],config["log_path"],config["tb_path"]))
def train(): unet_model = unet.build_model(*oxford_iiit_pet.IMAGE_SIZE, channels=oxford_iiit_pet.channels, num_classes=oxford_iiit_pet.classes, layer_depth=4, filters_root=64, padding="same") unet.finalize_model(unet_model, loss=losses.SparseCategoricalCrossentropy(), metrics=[metrics.SparseCategoricalAccuracy()], auc=False, learning_rate=LEARNING_RATE) trainer = unet.Trainer(name="oxford_iiit_pet") train_dataset, validation_dataset = oxford_iiit_pet.load_data() trainer.fit(unet_model, train_dataset, validation_dataset, epochs=25, batch_size=1) return unet_model
def _logits_loss(self, acts_and_advs, logits): # a trick to input actions and advantages through same API actions, advantages = tf.split(acts_and_advs, 2, axis=-1) # sparse categorical CE loss obj that supports sample_weight arg on call() # from_logits argument ensures transformation into normalized probabilities weighted_sparse_ce = kls.SparseCategoricalCrossentropy( from_logits=True) # policy loss is defined by policy gradients, weighted by advantages # note: we only calculate the loss on the actions we've actually taken actions = tf.cast(actions, tf.int32) policy_loss = weighted_sparse_ce(actions, logits, sample_weight=advantages) # entropy loss can be calculated via CE over itself entropy_loss = kls.categorical_crossentropy(logits, logits, from_logits=True) # here signs are flipped because optimizer minimizes return policy_loss - self.params['entropy'] * entropy_loss
def _logits_loss(self, actions_and_advantages, logits): # A trick to input actions and advantages through the same API. actions, advantages = tf.split(actions_and_advantages, 2, axis=-1) # Sparse categorical CE loss obj that supports sample_weight arg on `call()`. # `from_logits` argument ensures transformation into normalized probabilities. weighted_sparse_ce = kls.SparseCategoricalCrossentropy( from_logits=True) # Policy loss is defined by policy gradients, weighted by advantages. # Note: we only calculate the loss on the actions we've actually taken. actions = tf.cast(actions, tf.int32) ### NOT SURE, but to ignore the case where advantages equal negative, which causing neg loss # adv2 = tf.where( advantages > 0 , advantages, 0) policy_loss = weighted_sparse_ce(actions, logits, sample_weight=advantages) # policy_loss = weighted_sparse_ce(actions, logits, sample_weight=adv2) # Entropy loss can be calculated as cross-entropy over itself. probs = tf.nn.softmax(logits) entropy_loss = kls.categorical_crossentropy(probs, probs) # We want to minimize policy and maximize entropy losses. # Here signs are flipped because the optimizer minimizes. policy_loss2 = tf.where(policy_loss > 0, policy_loss, tf.math.maximum(policy_loss, -10)) # return policy_loss - self.entropy_c * entropy_loss return policy_loss2 - self.entropy_c * entropy_loss
def __init__(self, model): self.model = model self.gamma = .99 #discounting learning_rate = 3e-8 self.model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=7e-3), loss=[self._logits_loss, self._value_loss]) #I set learning rate small because rewards are pretty big, can try changing self.logitloss = kls.SparseCategoricalCrossentropy(from_logits=True)
def train_step(model, optim, X, Y): with tf.GradientTape() as tape: Y_cap = model(X, training=True) loss = losses.SparseCategoricalCrossentropy()(Y, Y_cap) variables = model.trainable_variables gradeints = tape.gradient(loss, variables) optim.apply_gradients(zip(gradeints, variables)) return loss, Y_cap
def setUp(self): """Setup shared by all tests""" self.scce = losses.SparseCategoricalCrossentropy( reduction=losses.Reduction.NONE) self.gamma1 = 2.0 self.gamma2 = 4.0 self.sfl1 = SparseFocalLoss(gamma=self.gamma1) self.sfl2 = SparseFocalLoss(gamma=self.gamma2)
def __init__(self, max_len): self.model_name = 'distilbert-base-uncased' self.max_len = max_len self.tkzr = DistilBertTokenizer.from_pretrained(self.model_name) self.model = TFDistilBertForSequenceClassification.from_pretrained( self.model_name) self.optimizer = optimizers.Adam(learning_rate=3e-5) self.loss = losses.SparseCategoricalCrossentropy(from_logits=True)
def compile_model(model): model.compile(optimizer=optimizers.Nadam(), loss=losses.SparseCategoricalCrossentropy(), metrics=[ metrics.SparseCategoricalAccuracy(), metrics.SparseTopKCategoricalAccuracy(5) ]) return model
def preprocessing(self, seq_length, embedding_dim, units, dropout_rate, batch_size): """ Parameters ---------- seq_length : int Input length in the network. embedding_dim : int Embedding Dimension. units : int Dimensionality of the LSTM output space. dropout_rate : float Dropout rate between 0 and 1. batch_size : int Batch size. """ self.seq_length = seq_length self.embedding_dim = embedding_dim self.units = units self.dropout_rate = dropout_rate self.batch_size = batch_size # Extract all sentences from json files list_json = glob.glob(self.input_folder + '\*.json') dic_senders = preprocess(list_json) all_sentences = '' for key in dic_senders: # select all senders sentences_with_sender = [ key + ' : ' + s + '\n' for s in dic_senders[key] ] for s in sentences_with_sender: all_sentences += s # and add their message together vocab = sorted(set(all_sentences)) # Creating a mapping from unique characters to indices self.__char2idx = {u: i for i, u in enumerate(vocab)} self.__idx2char = np.array(vocab) self.nb_char = len(vocab) print('{} unique characters'.format(self.nb_char)) # Create dataset for training text_as_int = np.array([self.__char2idx[c] for c in all_sentences]) dataset = tf.data.Dataset.from_tensor_slices(text_as_int) sequences = dataset.batch(self.seq_length + 1, drop_remainder=True) dataset = sequences.map(self.__split_input_target) self.dataset = dataset.shuffle(self.BUFFER_SIZE).batch( self.batch_size, drop_remainder=True) # Create model self.model = self.__create_model(embedding_dim, units, dropout_rate, self.batch_size) self.model.summary() self.model.compile( optimizer='adam', loss=losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
def compile(self, learning_rate): ''' Compiles this model. ''' optimizer = optimizers.Adam(learning_rate=learning_rate) loss = losses.SparseCategoricalCrossentropy(from_logits=True) super().compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
def model_fn(): # We _must_ create a new model here, and _not_ capture it from an external # scope. TFF will call this within different graph contexts. keras_model = create_keras_model() return tff.learning.from_keras_model( keras_model, input_spec=preprocessed_sample_dataset.element_spec, loss=losses.SparseCategoricalCrossentropy(), metrics=[metrics.SparseCategoricalAccuracy()])
def test_equal_weights_logits_1d(self): """Testing equal weights logits for 1D data""" y_true = [1, 2] y_pred = [[-0.05, 0.3, 0.19], [0.2, -0.4, 0.12]] self.scce = losses.SparseCategoricalCrossentropy(from_logits=True) self.bscce_equal = BalancedSCCE([1, 1, 1], from_logits=True) scce = self.scce(y_true, y_pred).numpy() bscce = self.bscce_equal(y_true, y_pred).numpy() np.testing.assert_array_equal(scce, bscce)
def setUp(self): """Setup shared by all tests""" self.scce = losses.SparseCategoricalCrossentropy( reduction=losses.Reduction.NONE) self.gamma = 4.0 self.sfl_equal = BalancedSparseFocalLoss([1, 1, 1], gamma=self.gamma) self.class_weights = [0.2, 0.3, 0.5] self.sfl_unequal = BalancedSparseFocalLoss(self.class_weights, gamma=self.gamma)
def test_equal_weights_reduction_1d(self): """Testing equal weights reductions for 1D data""" y_true = [1, 2] y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] self.scce = losses.SparseCategoricalCrossentropy( reduction=losses.Reduction.SUM) self.bscce_equal = BalancedSCCE([1, 1, 1], reduction=losses.Reduction.SUM) scce = self.scce(y_true, y_pred).numpy() bscce = self.bscce_equal(y_true, y_pred).numpy() np.testing.assert_array_equal(scce, bscce) self.scce = losses.SparseCategoricalCrossentropy( reduction=losses.Reduction.NONE) self.bscce_equal = BalancedSCCE([1, 1, 1], reduction=losses.Reduction.NONE) scce = self.scce(y_true, y_pred).numpy() bscce = self.bscce_equal(y_true, y_pred).numpy() np.testing.assert_array_equal(scce, bscce)