def import_inception(config, input_layer, learning_rate=LEARNING_RATE): input_shape = config.get('INPUT_SHAPE') if input_shape[0] < 75 or input_shape[ 1] < 75: # minimal input for InceptionV3 input_layer_resize = Lambda( lambda x: K.tf.image.resize_bilinear(x, (75, 75)))(input_layer) else: input_layer_resize = input_layer if input_shape[-1] == 1: input_layer_tricanals = concatenate( [input_layer_resize, input_layer_resize, input_layer_resize]) elif input_shape[-1] == 3: input_layer_tricanals = input_layer_resize else: input_layer_tricanals = Conv2D(3, (1, 1))(input_layer_resize) inception_base_model = InceptionV3(input_tensor=input_layer_tricanals, classes=1, include_top=False, activation=config['ACTIVATION'], weights=None) x = inception_base_model.output x = GlobalAveragePooling2D()(x) x = Dense(1, activation='sigmoid')(x) inception = Model(input_layer, outputs=[x], name="inception") inception.compile(loss='mse', optimizer=RAdamOptimizer(learning_rate)) return inception
def create_loss_optimizer(self): print('[*] Defining Loss Functions and Optimizer...') with tf.variable_scope('e_divergence_cost', reuse=self.config.reuse): kerneled_embedding = kernels.get_kernel( X=self.latent, batch_size=self.config.batch_size, param=self.config.df, epsilon=self.config.epsilon, kernel=self.config.kernel_mode) self.e_divergence_cost = losses.get_distributions_div_cost( self.joint_probabilities_batch, kerneled_embedding, self.config.e_div_cost) self.e_div_cost_m = tf.reduce_mean(self.e_divergence_cost) with tf.variable_scope("L2_loss", reuse=self.config.reuse): tv = tf.trainable_variables() self.L2_loss = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv]) with tf.variable_scope('embedding_loss', reuse=self.config.reuse): self.embedding_loss = tf.add(tf.reduce_mean( self.e_divergence_cost), self.config.l2 * self.L2_loss, name='embedding_loss') with tf.variable_scope("optimizer", reuse=self.config.reuse): self.optimizer = RAdamOptimizer(self.lr) self.train_step = self.optimizer.minimize( self.embedding_loss, global_step=self.global_step_tensor) self.losses = [ 'Embedding_loss', 'E_diverg_{}'.format(self.config.e_div_cost), 'Regul_L2' ]
def create_loss_optimizer(self): print('[*] Defining Loss Functions and Optimizer...') with tf.name_scope('reconstruct'): self.reconstruction = losses.get_reconst_loss( self.x_batch_flat, self.x_recons_flat, self.config.reconst_loss) self.loss_reconstruction_m = tf.reduce_mean(self.reconstruction) with tf.variable_scope('L2_loss', reuse=self.config.reuse): tv = tf.trainable_variables() self.L2_loss = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv]) with tf.variable_scope('encoder_loss', reuse=self.config.reuse): self.ae_loss = tf.add(tf.reduce_mean(self.reconstruction), self.config.l2 * self.L2_loss, name='encoder_loss') with tf.variable_scope('dipae_loss', reuse=self.config.reuse): self.covar_reg = self.regularizer(self.encoder_mean, self.encoder_var) self.dipae_loss = tf.add(self.ae_loss, self.covar_reg) with tf.variable_scope("optimizer", reuse=self.config.reuse): self.optimizer = RAdamOptimizer(self.lr) self.train_step = self.optimizer.minimize( self.dipae_loss, global_step=self.global_step_tensor) self.losses = [ 'ELBO_DIPcovAE', 'Regul_Covariance_Prior', 'AE', 'Recons_{}'.format(self.config.reconst_loss), 'Regul_L2' ]
def import_model_v3(config=CONFIG, name=DEFAULT_NAME, weight_root=WEIGHT_ROOT, summary_root=SUMMARY_ROOT, load=LOAD, additional_input_number=0): input_shape = config.get('INPUT_SHAPE') labels = config.get('LABELS') activation = config.get('ACTIVATION', 'relu') last_activation = config.get('LAST_ACTIVATION', 'softmax') learning_rate = config.get('LEARNING_RATE', LEARNING_RATE) loss = config.get('LOSS', DEFAULT_LOSS) metrics = config.get('METRICS', DEFAULT_METRICS) if additional_input_number: model = build_multi_input_inception_v3(input_shape, activation, last_activation, labels, additional_input_number) else: model = build_inception_v3(input_shape, activation, last_activation, labels) optimizer = RAdamOptimizer(learning_rate) model.compile(optimizer=optimizer, loss=loss, metrics=metrics) model.labels = labels model.name = name model.weight_filename = os.path.join(weight_root, f"{name}.h5") model.summary_filename = os.path.join(summary_root, f"{name}.txt") if load: print('load weights') model.load_weights(model.weight_filename) write_summary(model) return model
def test_training_warmup(self): if not TF_KERAS: return from keras_radam.training import RAdamOptimizer self._test_fit( RAdamOptimizer(total_steps=38400, warmup_proportion=0.1, min_lr=1e-6))
def get_model_residual_concat_radam(): model = create_model_residual_concat() optimizer = RAdamOptimizer(total_steps=5000, warmup_proportion=0.1, min_lr=1e-5) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) return model
def test_model(test_trees, labels, embeddings, embedding_lookup, opt): logdir = opt.model_path batch_size = opt.train_batch_size epochs = opt.niter num_feats = len(embeddings[0]) random.shuffle(test_trees) # build the inputs and outputs of the network nodes_node, children_node, codecaps_node = network.init_net_treecaps( num_feats, len(labels)) out_node = network.out_layer(codecaps_node) labels_node, loss_node = network.loss_layer(codecaps_node, len(labels)) optimizer = RAdamOptimizer(opt.lr) train_step = optimizer.minimize(loss_node) sess = tf.Session() sess.run(tf.global_variables_initializer()) with tf.name_scope('saver'): saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(logdir) if ckpt and ckpt.model_checkpoint_path: print("Continue training with old model") saver.restore(sess, ckpt.model_checkpoint_path) for i, var in enumerate(saver._var_list): print('Var {}: {}'.format(i, var)) checkfile = os.path.join(logdir, 'tree_network.ckpt') correct_labels = [] predictions = [] print('Computing training accuracy...') for batch in sampling.batch_samples( sampling.gen_samples(test_trees, labels, embeddings, embedding_lookup), 1): nodes, children, batch_labels = batch output = sess.run([out_node], feed_dict={ nodes_node: nodes, children_node: children, }) correct_labels.append(np.argmax(batch_labels)) predictions.append(np.argmax(output)) target_names = list(labels) print( classification_report(correct_labels, predictions, target_names=target_names)) print(confusion_matrix(correct_labels, predictions)) print('*' * 50) print('Accuracy:', accuracy_score(correct_labels, predictions)) print('*' * 50)
def get_model(args): logging.info('get model') # Get the InceptionV3 model so we can do transfer learning if args.base_modelname == 'inceptionV3': base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) elif args.base_modelname == 'inceptionResNetV2': base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(args.IMG_WIDTH, args.IMG_WIDTH, 3)) out = base_model.output out = Flatten()(out) # out = GlobalAveragePooling2D()(out) out = Dense(512, activation='relu')(out) out = Dropout(0.5)(out) out = Dense(512, activation='relu')(out) out = Dropout(0.5)(out) total_classes = train_generator.num_classes predictions = Dense(total_classes, activation='softmax')(out) model = Model(inputs=base_model.input, outputs=predictions) elif args.base_modelname == 'MobileNetV2': base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(args.IMG_WIDTH, args.IMG_WIDTH, 3)) out = base_model.output out = GlobalAveragePooling2D()(out) out = Dense(512, activation='relu')(out) out = Dropout(0.5)(out) out = Dense(512, activation='relu')(out) out = Dropout(0.5)(out) total_classes = train_generator.num_classes predictions = Dense(total_classes, activation='softmax')(out) model = Model(inputs=base_model.input, outputs=predictions) else: x = Input(shape=(args.IMG_WIDTH, args.IMG_WIDTH, 3)) out = GlobalAveragePooling2D()(x) out = Dense(128, activation='relu')(out) out = Dropout(0.5)(out) total_classes = train_generator.num_classes predictions = Dense(total_classes, activation='softmax')(out) model = Model(inputs=x, outputs=predictions) model.compile(optimizer=RAdamOptimizer(learning_rate=1e-3), loss='categorical_crossentropy', metrics=['accuracy']) # model.summary() return model
def __init__(self, weight_root=WEIGHT_ROOT, summary_root=SUMMARY_ROOT, load=LOAD, learning_rate=LEARNING_RATE, config=CONFIG, name=DEFAULT_NAME, skip=True, metrics=None, labels=None): self.discriminator_loss = mean_squared_error self.generator_loss = config.get('LOSS', 'mse') if self.generator_loss == 'mse': self.generator_loss = mean_squared_error elif self.generator_loss == 'WBCE': self.generator_loss = weighted_binary_crossentropy self.generator = import_unet_model(weight_root=weight_root, summary_root=summary_root, load=load, learning_rate=learning_rate, config=config, name="unet", skip=skip, metrics=metrics, labels=labels) input_layer = Input(self.generator.input_shape[1:]) generator_prediction = self.generator(input_layer) self.discriminator = import_inception(config, input_layer, learning_rate=learning_rate) self.discriminator.trainable = False discriminator_prediction = self.discriminator(generator_prediction) self.adversarial_autoencoder = Model( input_layer, [generator_prediction, discriminator_prediction], name=name) self.loss_weights = [1., 1.] self.adversarial_autoencoder.compile( loss=[self.generator_loss, self.discriminator_loss], loss_weights=self.loss_weights, optimizer=RAdamOptimizer(learning_rate)) self.name = name self.weight_filename = os.path.join(weight_root, f"{self.name}.h5") self.summary_filename = os.path.join(summary_root, f"{self.name}.txt") self.generator.weight_filename = self.weight_filename self.generator.summary_filename = self.summary_filename self.generator.name = self.name self.adversarial_autoencoder.summary_filename = self.summary_filename write_summary(self.adversarial_autoencoder)
def create_loss_optimizer(self): print('[*] Defining Loss Functions and Optimizer...') with tf.name_scope('prior_recons'): self.prior_recons = losses.get_reconst_loss(self.sample_flat, self.sample_recons_flat, self.config.prior_reconst_loss) self.prior_recons_m = tf.reduce_mean(self.prior_recons) with tf.name_scope('reconstruct'): self.reconstruction = losses.get_reconst_loss(self.x_batch_flat, self.x_recons_flat, self.config.reconst_loss) self.loss_reconstruction_m = tf.reduce_mean(self.reconstruction) with tf.variable_scope('L2_loss', reuse=self.config.reuse): tv = tf.trainable_variables() self.L2_loss = tf.reduce_sum([ tf.nn.l2_loss(v) for v in tv if 'post_' in v.name]) with tf.variable_scope('encoder_loss', reuse=self.config.config.reuse): self.ae_loss = tf.add(tf.reduce_mean(self.reconstruction), self.config.l2*self.L2_loss, name='encoder_loss') with tf.variable_scope('divergence_cost', reuse=self.config.config.reuse): self.divergence_cost = losses.get_self_divergence(self.encoder_mean, self.encoder_logvar, self.config.div_cost) self.div_cost_m = tf.reduce_mean(self.divergence_cost) with tf.variable_scope('vae_loss', reuse=self.config.reuse): self.vae_loss = tf.add(self.ae_loss, self.div_cost_m) with tf.variable_scope('annvae_loss', reuse=self.config.reuse): c = self.anneal(self.config.c_max, self.global_step_tensor, self.config.itr_thd) self.anneal_reg = self.config.ann_gamma * tf.math.abs(self.div_cost_m - c) self.annvae_loss = tf.add(self.ae_loss, self.anneal_reg) with tf.variable_scope('bayae_loss', reuse=self.config.reuse): if self.config.isConv: self.bay_div = -1 * losses.get_divergence(self.encoder_mean, self.encoder_var, \ tf.reshape(self.prior_mean, [self.config.MC_samples, self.config.batch_size, self.config.latent_dim]), \ tf.reshape(self.prior_var, [self.config.MC_samples, self.config.batch_size, self.config.latent_dim]), self.config.prior_div_cost) else: self.bay_div = -1 * losses.get_divergence(self.encoder_mean, self.encoder_var, \ self.prior_mean, self.prior_var, self.config.prior_div_cost) self.bayae_loss = tf.add(tf.cast(self.config.ntrain_batches, 'float32') * self.ae_loss, self.bay_div, name='bayae_loss') self.bayvae_loss = tf.add(tf.cast(self.config.ntrain_batches, 'float32') * self.vae_loss, self.bay_div, name='bayvae_loss') self.annbayvae_loss = tf.add(tf.cast(self.config.ntrain_batches, 'float32') * self.annvae_loss, self.bay_div, name='bayvae_loss') with tf.variable_scope('optimizer', reuse=self.config.reuse): self.optimizer = RAdamOptimizer(self.lr) self.train_step = self.optimizer.minimize(self.annbayvae_loss, global_step=self.global_step_tensor) self.losses = ['ELBO_AnnBayVAE', 'BayVAE', 'BayAE', 'AE', 'Recons_{}'.format(self.config.reconst_loss), 'Div_{}'.format(self.config.div_cost), 'Regul_anneal_reg', 'Regul_L2', 'prior_recons_{}'.format(self.config.prior_reconst_loss), 'bayesian_div_{}'.format(self.config.prior_div_cost)]
def import_model(weight_root=WEIGHT_ROOT, summary_root=SUMMARY_ROOT, load=LOAD, learning_rate=LEARNING_RATE, config=CONFIG, name=DEFAULT_NAME, skip=True, metrics=None, labels=None, additional_input_number=0): conv_layers = config['CONV_LAYERS'] central_shape = config.get('CENTRAL_SHAPE', conv_layers[-1] * 2) learning_rate = config.get('LEARNING_RATE', learning_rate) batch_normalization = config.get('BATCH_NORMALIZATION', False) input_shape = config.get('INPUT_SHAPE', (512, 512, 3)) output_shape = config.get('OUTPUT_SHAPE', input_shape) activation = config.get('ACTIVATION', 'relu') if activation == 'sin': activation = K.sin last_activation = config.get('LAST_ACTIVATION', 'sigmoid') loss = config.get('LOSS', 'mse') loss = LOSS_TRANSLATION.get(loss, loss) if labels is not None and len(labels) != output_shape[-1]: labels = [f'class_{i}' for i in range(output_shape[-1])] optimizer = RAdamOptimizer(learning_rate) metrics = get_additional_metrics(metrics, loss, labels) if additional_input_number: model = build_multi_input_unet(input_shape, activation, batch_normalization, conv_layers, skip, last_activation, output_shape, central_shape, additional_input_number) else: model = build_unet(input_shape, activation, batch_normalization, conv_layers, skip, last_activation, output_shape, central_shape) model.compile(optimizer=optimizer, loss=loss, metrics=metrics) model.name = name model.summary_filename = os.path.join(summary_root, f"{name}.txt") model.weight_filename = os.path.join(weight_root, f"{name}.h5") if load: print('load weights') model.load_weights(model.weight_filename) write_summary(model) return model
def create_loss_optimizer(self): print('[*] Defining Loss Functions and Optimizer...') with tf.name_scope('reconstruct'): self.reconstruction = losses.get_reconst_loss( self.x_batch_flat, self.x_recons_flat, self.config.reconst_loss) self.loss_reconstruction_m = tf.reduce_mean(self.reconstruction) with tf.variable_scope('L2_loss', reuse=self.config.reuse): tv = tf.trainable_variables() self.L2_loss = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv]) with tf.variable_scope('encoder_loss', reuse=self.config.reuse): self.ae_loss = tf.add(tf.reduce_mean(self.reconstruction), self.config.l2 * self.L2_loss, name='encoder_loss') with tf.variable_scope('divergence_cost', reuse=self.config.reuse): self.divergence_cost = losses.get_self_divergence( self.encoder_mean, self.encoder_logvar, self.config.div_cost) self.div_cost_m = tf.reduce_mean(self.divergence_cost) with tf.variable_scope('vae_loss', reuse=self.config.reuse): self.vae_loss = tf.add(self.ae_loss, self.div_cost_m) with tf.variable_scope('bvae_loss', reuse=self.config.reuse): self.beta_reg = tf.multiply(self.config.beta, self.div_cost_m) self.bvae_loss = tf.add(self.ae_loss, self.beta_reg) with tf.variable_scope('btcvae_loss', reuse=self.config.reuse): """ Based on Equation 4 with alpha = gamma = 1 of "Isolating Sources of Disentanglement in Variational Autoencoders" (https: // arxiv.org / pdf / 1802.04942). If alpha = gamma = 1, Eq 4 can be written as ELBO + (1 - beta) * TC. """ tc = tf.multiply(1-self.config.beta, self.total_correlation(self.latent_batch, self.encoder_mean, \ self.encoder_logvar)) self.tc_beta_reg = tf.add(self.div_cost_m, tc) self.btcvae_loss = tf.add(self.ae_loss, self.tc_beta_reg) with tf.variable_scope("optimizer", reuse=self.config.reuse): self.optimizer = RAdamOptimizer(self.lr) self.train_step = self.optimizer.minimize( self.btcvae_loss, global_step=self.global_step_tensor) self.losses = [ 'ELBO_Beta-TC-VAE', 'Beta-VAE', 'VAE', 'AE', 'Recons_{}'.format(self.config.reconst_loss), 'Regul_tc_beta_reg', 'Regul_beta_reg', 'Div_{}'.format(self.config.div_cost), 'Regul_L2' ]
def _set_optimizer(self): """ Returns a TF optimizer, given the model settings """ if self.optimizer_type == "ADAM": return tf.compat.v1.train.AdamOptimizer( learning_rate=self.learning_rate_var) elif self.optimizer_type == "NADAM": return tf.contrib.opt.NadamOptimizer( learning_rate=self.learning_rate_var) elif self.optimizer_type == "RADAM": return RAdamOptimizer(learning_rate=self.learning_rate_var) else: raise ValueError( "{} is not a supported optimizer type. Supported optimizer types: " "ADAM, NADAM, RADAM.".format(self.optimizer_type))
def create_loss_optimizer(self): print('[*] Defining Loss Functions and Optimizer...') with tf.name_scope('reconstruct'): self.reconstruction = losses.get_reconst_loss( self.x_batch_flat, self.x_recons_flat, self.config.reconst_loss) self.loss_reconstruction_m = tf.reduce_mean(self.reconstruction) with tf.name_scope('prior_recons'): self.prior_recons = losses.get_reconst_loss( self.sample_flat, self.sample_recons_flat, self.config.prior_reconst_loss) self.prior_recons_m = tf.reduce_mean(self.prior_recons) with tf.variable_scope("L2_loss", reuse=self.config.reuse): tv = tf.trainable_variables() self.L2_loss = tf.reduce_sum( [tf.nn.l2_loss(v) for v in tv if 'post_' in v.name]) with tf.variable_scope('encoder_loss', reuse=self.config.reuse): self.ae_loss = tf.add(tf.reduce_mean(self.reconstruction), self.config.l2 * self.L2_loss, name='encoder_loss') + self.prior_recons_m with tf.variable_scope('bayae_loss', reuse=self.config.reuse): if self.config.isConv: self.bay_div = -1 * losses.get_QP_kl(self.post_mean, self.post_var, \ tf.reshape(self.prior_mean, [self.config.MC_samples, self.config.batch_size, self.config.latent_dim]), tf.reshape(self.prior_var, [self.config.MC_samples, self.config.batch_size, self.config.latent_dim])) else: self.bay_div = -1 * losses.get_QP_kl(self.post_mean, self.post_var, \ self.prior_mean, self.prior_var) self.bayae_loss = tf.add( tf.cast(self.config.ntrain_batches, 'float32') * self.ae_loss, self.bay_div, name='bayae_loss') with tf.variable_scope("optimizer", reuse=self.config.reuse): self.optimizer = RAdamOptimizer(self.lr) self.train_step = self.optimizer.minimize( self.bayae_loss, global_step=self.global_step_tensor) self.losses = [ 'ELBO_BayAE', 'AE', 'Recons_{}'.format(self.config.reconst_loss), 'Regul_L2', 'prior_recons_{}'.format(self.config.prior_reconst_loss), 'bayesian_div_{}'.format(self.config.prior_div_cost) ]
def compile_model(self, optimizer, losses, metrics): self._maybe_load_checkpoint() if optimizer.lower() == 'radam': optimizer = RAdamOptimizer(total_steps=1000, warmup_proportion=0.1, learning_rate=self._learning_rate, min_lr=1e-8) elif optimizer.lower() == 'adam': optimizer = Adam(lr=self._learning_rate) elif optimizer.lower() == 'adagrad': optimizer = Adagrad(lr=self._learning_rate) elif optimizer.lower() == 'sgd': optimizer = SGD(lr=self._learning_rate, momentum=0.9) elif optimizer.lower() == 'rmsprop': optimizer = RMSprop(lr=self._learning_rate) self._optimizer = optimizer return self._model.compile(optimizer=self._optimizer, loss=losses, metrics=metrics)
def train(self): train, test, label = self.data['x_train'], self.data[ 'x_test'], self.data['y_train'] folds, batch_size = 5, 256 y_vals = np.zeros(len(label)) y_test = np.zeros((len(test), folds)) kf = KFold(n_splits=folds, shuffle=True, random_state=10) for fold_n, (train_index, val_index) in enumerate(kf.split(label)): model = self.create_model() model.compile(loss='categorical_crossentropy', optimizer=RAdamOptimizer(learning_rate=1e-4)) patient, best_score = 0, 0 x_trn, y_trn = train[train_index], label[train_index] x_val, y_val = train[val_index], label[val_index] for epoch in range(100): generator = batch_iter(x_trn, y_trn, self.vocab) for x_batch, y_batch in generator: model.train_on_batch([x_batch], [np.eye(3)[y_batch]]) x_val_tok = seq_padding(x_val, self.vocab) y_val_pre = model.predict(x_val_tok) y_val_pre = np.argmax(y_val_pre, -1) # 最大的值所在的索引作为预测结果 score = f1(y_val, y_val_pre) # ==========EarlyStop=========== # if score > best_score: patient = 0 best_score = score y_vals[val_index] = y_val_pre model.save_weights('weight') print('epoch:{}, score:{}, best_score:{}'.format( epoch, score, best_score)) patient += 1 if patient >= 8: break # ==========加载最优模型预测测试集=========== # model.load_weights('weight') test_tok = seq_padding(test, self.vocab) predict = model.predict([test_tok]) y_test[:, fold_n] = np.argmax(predict, -1) print("=" * 50) y_test = stats.mode(y_test, axis=1)[0].reshape(-1) # 投票决定结果 print("=" * 50) print('final score: ', f1(label, y_vals)) return y_test, y_vals
def create_autoencoder(input_dim, output_dim, noise=0.05): i = L.Input(input_dim) encoded = L.BatchNormalization()(i) encoded = L.GaussianNoise(noise)(encoded) encoded = L.Dense(128, activation='relu')(encoded) decoded = L.Dropout(0.2)(encoded) decoded = L.Dense(input_dim, name='decoded')(decoded) x = L.Dense(64, activation='relu')(decoded) x = L.BatchNormalization()(x) x = L.Dropout(0.2)(x) x = L.Dense(output_dim, activation='linear', name='ata_output')(x) encoder = keras.models.Model(inputs=i, outputs=decoded) autoencoder = keras.models.Model(inputs=i, outputs=[decoded, x]) autoencoder.compile(optimizer=RAdamOptimizer(learning_rate=1e-3), loss={ 'decoded': 'mse', 'ata_output': 'mape' }) return autoencoder, encoder
def create_loss_optimizer(self): print('[*] Defining Loss Functions and Optimizer...') with tf.name_scope('reconstruct'): self.reconstruction = losses.get_reconst_loss(self.x_batch_flat, self.x_recons_flat, self.config.reconst_loss) self.loss_reconstruction_m = tf.reduce_mean(self.reconstruction) with tf.variable_scope('L2_loss', reuse=self.config.reuse): tv = tf.trainable_variables() self.L2_loss = tf.reduce_sum([ tf.nn.l2_loss(v) for v in tv ]) with tf.variable_scope('encoder_loss', reuse=self.config.reuse): self.ae_loss = tf.add(tf.reduce_mean(self.reconstruction), self.config.l2*self.L2_loss, name='encoder_loss') with tf.variable_scope('divergence_cost', reuse=self.config.reuse): self.divergence_cost = losses.get_self_divergence(self.encoder_mean, self.encoder_logvar, self.config.div_cost) self.div_cost_m = tf.reduce_mean(self.divergence_cost) with tf.variable_scope('vae_loss', reuse=self.config.reuse): self.vae_loss = tf.add(self.ae_loss, self.div_cost_m) with tf.variable_scope('bvae_loss', reuse=self.config.reuse): self.beta_reg = tf.multiply(self.config.beta, self.div_cost_m) self.bvae_loss = tf.add(self.ae_loss, self.beta_reg) with tf.variable_scope('annvae_loss', reuse=self.config.reuse): c = self.anneal(self.config.c_max, self.global_step_tensor, self.config.itr_thd) self.anneal_reg = self.config.ann_gamma * tf.math.abs(self.div_cost_m - c) self.annvae_loss = tf.add(self.ae_loss, self.anneal_reg) self.annbvae_loss = tf.add(self.bvae_loss, self.anneal_reg) with tf.variable_scope("optimizer" ,reuse=self.config.reuse): self.optimizer = RAdamOptimizer(self.lr) self.train_step = self.optimizer.minimize(self.annbvae_loss, global_step=self.global_step_tensor) self.losses = ['ELBO_AnnBeta-VAE', 'Beta-VAE', 'annVAE', 'VAE', 'AE', 'Recons_{}'.format(self.config.reconst_loss), 'Div_{}'.format(self.config.div_cost), \ 'Regul_beta_reg', 'Regul_anneal_reg', 'Div_{}'.format(self.config.div_cost), 'Regul_L2']
def train_model(x_data, y_data, rain_data, k): k_fold = KFold(n_splits=k, shuffle=True, random_state=0) #stratified_k_fold = StratifiedKFold(n_splits=k, shuffle=True, random_state=0) model_number = 0 #for train_idx, val_idx in tqdm(stratified_k_fold.split(x_data, rain_data)): for train_idx, val_idx in tqdm(k_fold.split(x_data)): x_train, y_train = x_data[train_idx], y_data[train_idx] x_val, y_val = x_data[val_idx], y_data[val_idx] input_layer = Input(x_train.shape[1:]) output_layer = train2_unet2_model(input_layer, 32) model = Model(input_layer, output_layer) callbacks_list = [ # 스케쥴러? #tf.keras.callbacks.ReduceLROnPlateau( # monitor='val_loss', # patience=3, # factor=0.8 #), tf.keras.callbacks.ModelCheckpoint( filepath='./models/model' + str(model_number) + '.h5', monitor='score', save_best_only=True, #save_weights_only=True, verbose=1 ) ] model.compile(loss='mae', optimizer=RAdamOptimizer(learning_rate=1e-3) , metrics=[score, maeOverFscore_keras, fscore_keras]) # stratified_k_fold 사용시 batch_size는 최소 128에서 256이 되어야한다. model.fit(x_train, y_train, epochs=50, batch_size=128, shuffle=True, validation_data=(x_val, y_val), callbacks=callbacks_list) model_number += 1
def test_tensor(self): if not TF_KERAS or EAGER_MODE: return import tensorflow as tf from keras_radam.training import RAdamOptimizer x = tf.compat.v1.placeholder("float") y = tf.compat.v1.placeholder("float") w = tf.Variable([1.0, 2.0], name="w") y_model = tf.multiply(x, w[0]) + w[1] loss = tf.square(y - y_model) train_op = RAdamOptimizer().minimize(loss) model = tf.global_variables_initializer() with tf.Session() as session: session.run(model) for i in range(10000): x_value = np.random.rand() y_value = x_value * 2 + 6 session.run(train_op, feed_dict={x: x_value, y: y_value}) w_value = session.run(w) print("Predicted model: {a:.3f}x + {b:.3f}".format(a=w_value[0], b=w_value[1]))
model.add(Flatten()) model.add(Dense(128)) model.add(BatchNormalization()) model.add(Activation(lrelu)) model.add(Dense(32)) model.add(BatchNormalization()) model.add(Activation(lrelu)) model.add(Dense(num_classes, activation='softmax')) model = multi_gpu_model(model, gpus=len(gpus), cpu_merge=False) model.compile(loss='categorical_crossentropy', optimizer=RAdamOptimizer(learning_rate=1e-4), metrics=['accuracy']) model.summary() history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test)) # Save model and weights if not os.path.isdir(save_dir): os.makedirs(save_dir) model_path = os.path.join(save_dir, model_name) model.save(model_path) print('Saved trained model at %s ' % model_path)
def model_run(): # define loss function, optimize, model lossFunction = tf.keras.losses.CategoricalCrossentropy( ) # custom_loss_optimization_function.CategoricalCE(w) opt = RAdamOptimizer(learning_rate=1e-4) model_CNN = multipleModel() # CustomModel(input1, input2, output) model_CNN.compile(optimizer=opt, loss=lossFunction, metrics=["acc"]) # fit data in model =========================================================================================== # because AspectJ is not too big, split bug report into 3 folds, fold 1 is oldest, for 3 is newest. # Else, projects will split 10 folds # fold 1 is split into 60% training, 40% validation. And fold 2 training - fold 3 test # (fold 3 training - fold 4 test, fold 4 training - fold 5 test, ....) file = open(BUG_MATRIX, 'rb') bug_matrix = pickle.load(file) # # get the minimize bugs # bug_matrix = bug_matrix[:50] file.close() file = open(SOURCE_MATRIX, 'rb') source_matrix = pickle.load(file) file.close() num_fold = 3 distance = len(bug_matrix) // num_fold # fold 1 is split into 60% training, 40% validation ========================================================== fold_1 = bug_matrix[:distance] train = distance * 6 // 10 bug_train = fold_1[:train] bug_val = fold_1[train:] # get data train and validation matrix_train, label_train = build_data_label.get_matrix_and_label(0, train) matrix_val, label_val = build_data_label.get_matrix_and_label( train, distance) bug_train, source_train = split_data( matrix_train, SENT_BUG) # 114 is the height of bug matrix bug_val, source_val = split_data(matrix_val, SENT_BUG) # convert data to array bug_train = np.reshape(bug_train, (-1, SENT_BUG, 300, 1)) bug_val = np.reshape(bug_val, (-1, SENT_BUG, 300, 1)) source_train = np.reshape(source_train, (-1, SENT_SOURCE, 300, 1)) source_val = np.reshape(source_val, (-1, SENT_SOURCE, 300, 1)) bug_train = np.array(bug_train) bug_val = np.array(bug_val) source_train = np.array(source_train) source_val = np.array(source_val) label_train = np.array(label_train) label_val = np.array(label_val) model_CNN.fit(x=(bug_train, source_train), y=label_train, epochs=20, batch_size=32, validation_data=([bug_val, source_val], label_val)) # fold 2 --> fold 10: fold k is training data, fold k+1 is test data, k = 2..9 ============================== for fold in range(2, num_fold, 1): train_l = (fold - 1) * distance train_r = fold * distance test_l = fold * distance test_r = min((fold + 1) * distance, len(bug_matrix) - 1) # out of array # get data train and validation matrix_train, label_train = build_data_label.get_matrix_and_label( train_l, train_r) matrix_test, label_test = build_data_label.get_matrix_and_label_test( test_l, test_r) bug_train, source_train = split_data( matrix_train, SENT_BUG) # 114 is the height of bug matrix bug_test, source_test = split_data(matrix_test, SENT_BUG) # convert data to array bug_train = np.reshape(bug_train, (-1, SENT_BUG, 300, 1)) bug_test = np.reshape(bug_test, (-1, SENT_BUG, 300, 1)) source_train = np.reshape(source_train, (-1, SENT_SOURCE, 300, 1)) source_test = np.reshape(source_test, (-1, SENT_SOURCE, 300, 1)) bug_train = np.array(bug_train) bug_test = np.array(bug_test) source_train = np.array(source_train) source_test = np.array(source_test) label_train = np.array(label_train) label_test = np.array(label_test) model_CNN.fit(x=(bug_train, source_train), y=label_train, epochs=20, batch_size=64) # test model for each fold (loss, acc) = model_CNN.evaluate([bug_test, source_test], label_test) print( "==========>>>> [INFO] test accuracy: {:.4f}, loss: {:.4f}".format( acc, loss), end="") predict = model_CNN.predict([bug_test, source_test]) for i in range(10): print(label_test[i], '==', predict[i]) count = 0 for i in label_test: if i[0] == 1: count += 1 count_pre = 0 for i in predict: if i[0] > 0.5: count_pre += 1 print("label 1: ", count, count_pre) count = 0 for i in label_test: if i[0] == 0: count += 1 count_pre = 0 for i in predict: if i[0] < 0.5: count_pre += 1 print("label 0: ", count, count_pre) test.test(test_l, test_r, model_CNN) test.metrics_evaluate(test_l, test_r, model_CNN)
max_length=CLASSIFIER_MAX_LENGTH) pos_weight = get_positive_weight(train_labels) # For perfect shuffling, a buffer size greater than or equal to the full size of the dataset is required. train_dataset = train_dataset.shuffle( buffer_size=len(train_labels), reshuffle_each_iteration=True).batch( CLASSIFIER_BATCH_SIZE).repeat(n_epochs) valid_dataset = valid_dataset.batch(CLASSIFIER_BATCH_SIZE) # Prepare training: Compile tf.keras model with optimizer, loss and learning rate schedule # optimizer = RAdam(learning_rate=3e-5, decay=weight_decay, epsilon=1e-08, clipnorm=1.0) # https://github.com/CyberZHG/keras-radam/blob/master/keras_radam/training.py optimizer = RAdamOptimizer(learning_rate=3e-5, epsilon=1e-08, total_steps=n_epochs * len(train_labels) / CLASSIFIER_BATCH_SIZE, warmup_proportion=0.1) # can be enabled if Volta GPU or later # optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer) def weighted_binary_crossentropy(weights): def w_binary_crossentropy(y_true, y_pred): return tf.keras.backend.mean( tf.nn.weighted_cross_entropy_with_logits(labels=tf.cast( y_true, tf.float32), logits=y_pred, pos_weight=weights, name=None),
def test_training_amsgrad(self): if not TF_KERAS: return from keras_radam.training import RAdamOptimizer self._test_fit(RAdamOptimizer(amsgrad=True))
def arrival_model(inp_layer, inp_embed, link_size, cross_size, slice_size, input_deep_col, input_wide_col, link_nf_size, cross_nf_size, link_seqlen=170, cross_seqlen=12, pred_len=1, dropout=0.25, sp_dropout=0.1, embed_dim=64, hidden_dim=128, n_layers=3, lr=0.001, kernel_size1=3, kernel_size2=2, conv_size=128, conv=False): inp = L.concatenate(inp_embed, axis=-1) link_inputs = L.Input(shape=(link_seqlen, link_nf_size), name='link_inputs') cross_inputs = L.Input(shape=(cross_seqlen, cross_nf_size), name='cross_inputs') deep_inputs = L.Input(shape=(input_deep_col, ), name='deep_input') slice_input = L.Input(shape=(1, ), name='slice_input') wide_inputs = keras.layers.Input(shape=(input_wide_col, ), name='wide_inputs') # link---------------------------- categorical_link = link_inputs[:, :, :1] embed_link = L.Embedding(input_dim=link_size, output_dim=embed_dim, mask_zero=True)(categorical_link) reshaped_link = tf.reshape( embed_link, shape=(-1, embed_link.shape[1], embed_link.shape[2] * embed_link.shape[3])) reshaped_link = L.SpatialDropout1D(sp_dropout)(reshaped_link) """ categorical_slice = link_inputs[:, :, 5:6] embed_slice = L.Embedding(input_dim=289, output_dim=16, mask_zero=True)(categorical_slice) reshaped_slice = tf.reshape(embed_slice, shape=(-1, embed_slice.shape[1], embed_slice.shape[2] * embed_slice.shape[3])) reshaped_slice = L.SpatialDropout1D(sp_dropout)(reshaped_slice) categorical_hightemp = link_inputs[:, :, 6:7] embed_hightemp = L.Embedding(input_dim=33, output_dim=8, mask_zero=True)(categorical_hightemp) reshaped_hightemp = tf.reshape(embed_hightemp, shape=(-1, embed_hightemp.shape[1], embed_hightemp.shape[2] * embed_hightemp.shape[3])) reshaped_hightemp = L.SpatialDropout1D(sp_dropout)(reshaped_hightemp) categorical_weather = link_inputs[:, :, 7:8] embed_weather = L.Embedding(input_dim=7, output_dim=8, mask_zero=True)(categorical_weather) reshaped_weather = tf.reshape(embed_weather, shape=(-1, embed_weather.shape[1], embed_weather.shape[2] * embed_weather.shape[3])) reshaped_weather = L.SpatialDropout1D(sp_dropout)(reshaped_weather) numerical_fea1 = link_inputs[:, :, 1:5] numerical_fea1 = L.Masking(mask_value=0, name='numerical_fea1')(numerical_fea1) hidden = L.concatenate([reshaped_link, numerical_fea1, reshaped_slice, reshaped_hightemp, reshaped_weather], axis=2) """ numerical_fea1 = link_inputs[:, :, 1:] numerical_fea1 = L.Masking(mask_value=0, name='numerical_fea1')(numerical_fea1) hidden = L.concatenate([reshaped_link, numerical_fea1], axis=2) #hidden = L.Masking(mask_value=0)(hidden) for x in range(n_layers): hidden = gru_layer(hidden_dim, dropout)(hidden) if conv: x_conv1 = Conv1D(conv_size, kernel_size=kernel_size1, padding='valid', kernel_initializer='he_uniform')(hidden) avg_pool1_gru = GlobalAveragePooling1D()(x_conv1) max_pool1_gru = GlobalMaxPooling1D()(x_conv1) #x_conv2 = Conv1D(conv_size, kernel_size=kernel_size2, padding='valid', kernel_initializer='he_uniform')(hidden) #avg_pool2_gru = GlobalAveragePooling1D()(x_conv2) #max_pool2_gru = GlobalMaxPooling1D()(x_conv2) truncated_link = concatenate([avg_pool1_gru, max_pool1_gru]) else: truncated_link = hidden[:, :pred_len] truncated_link = L.Flatten()(truncated_link) # truncated_link = Attention(256)(hidden) # CROSS---------------------------- categorical_fea2 = cross_inputs[:, :, :1] embed2 = L.Embedding(input_dim=cross_size, output_dim=16, mask_zero=True)(categorical_fea2) reshaped2 = tf.reshape(embed2, shape=(-1, embed2.shape[1], embed2.shape[2] * embed2.shape[3])) reshaped2 = L.SpatialDropout1D(sp_dropout)(reshaped2) numerical_fea2 = cross_inputs[:, :, 1:] numerical_fea2 = L.Masking(mask_value=0, name='numerical_fea2')(numerical_fea2) hidden2 = L.concatenate([reshaped2, numerical_fea2], axis=2) # hidden2 = L.Masking(mask_value=0)(hidden2) for x in range(n_layers): hidden2 = gru_layer(hidden_dim, dropout)(hidden2) if conv: x_conv3 = Conv1D(conv_size, kernel_size=kernel_size1, padding='valid', kernel_initializer='he_uniform')(hidden2) avg_pool3_gru = GlobalAveragePooling1D()(x_conv3) max_pool3_gru = GlobalMaxPooling1D()(x_conv3) #x_conv4 = Conv1D(conv_size, kernel_size=kernel_size2, padding='valid', kernel_initializer='he_uniform')(hidden2) #avg_pool4_gru = GlobalAveragePooling1D()(x_conv4) #max_pool4_gru = GlobalMaxPooling1D()(x_conv4) truncated_cross = concatenate([avg_pool3_gru, max_pool3_gru]) else: truncated_cross = hidden2[:, :pred_len] truncated_cross = L.Flatten()(truncated_cross) # truncated_cross = Attention(256)(hidden2) # SLICE---------------------------- embed_slice = L.Embedding(input_dim=slice_size, output_dim=1)(slice_input) embed_slice = L.Flatten()(embed_slice) # DEEP_INPUS x = L.BatchNormalization()(deep_inputs) x = L.Dropout(0.25)(x) for i in range(3): x = L.Dense(256)(x) x = L.BatchNormalization()(x) x = L.Lambda(tf.keras.activations.swish)(x) x = L.Dropout(0.25)(x) dense_hidden3 = L.Dense(64, activation='linear')(x) # DCN cross = CrossLayer(output_dim=inp.shape[2], num_layer=8, name="cross_layer")(inp) truncated = L.concatenate([ truncated_link, truncated_cross, cross, dense_hidden3, wide_inputs, embed_slice ]) truncated = L.BatchNormalization()(truncated) truncated = L.Dropout(dropout)(L.Dense(512, activation='relu')(truncated)) truncated = L.BatchNormalization()(truncated) truncated = L.Dropout(dropout)(L.Dense(256, activation='relu')(truncated)) arrival_0 = L.Dense(1, activation='linear', name='arrival_0')(truncated) arrival_1 = L.Dense(1, activation='linear', name='arrival_1')(truncated) arrival_2 = L.Dense(1, activation='linear', name='arrival_2')(truncated) arrival_3 = L.Dense(1, activation='linear', name='arrival_3')(truncated) arrival_4 = L.Dense(1, activation='linear', name='arrival_4')(truncated) model = tf.keras.Model( inputs=[ inp_layer, link_inputs, cross_inputs, deep_inputs, wide_inputs, slice_input ], outputs=[arrival_0, arrival_1, arrival_2, arrival_3, arrival_4]) print(model.summary()) model.compile( loss='mse', optimizer=RAdamOptimizer( learning_rate=1e-3 ) # 'adam' RAdam(warmup_proportion=0.1, min_lr=1e-7) ) return model
def test_training_decay(self): if not TF_KERAS: return from keras_radam.training import RAdamOptimizer self._test_fit(RAdamOptimizer(weight_decay=1e-8))
def get_optimizer(cost, global_step, batch_steps_per_epoch, kwargs={}): optimizer_name = kwargs.get("optimizer", None) learning_rate = kwargs.get("learning_rate", None) decay_rate = kwargs.get("decay_rate", 0.985) decay_epochs = kwargs.get("decay_epochs", 1) decay_steps = decay_epochs * batch_steps_per_epoch with tf.variable_scope('optimizer', reuse=tf.AUTO_REUSE): if (optimizer_name is "momentum"): momentum = kwargs.get("momentum", 0.9) learning_rate_node = tf.train.exponential_decay( learning_rate=learning_rate, global_step=global_step, decay_rate=decay_rate, decay_steps=decay_steps, staircase=True, ) optimizer = tf.train.MomentumOptimizer( learning_rate=learning_rate_node, momentum=momentum, ) elif (optimizer_name is "rmsprop"): learning_rate_node = tf.train.exponential_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) optimizer = tf.train.RMSPropOptimizer( learning_rate=learning_rate_node) elif (optimizer_name is "adabound"): from .optimization.adabound import AdaBoundOptimizer learning_rate_node = tf.train.exponential_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) optimizer = AdaBoundOptimizer(learning_rate=learning_rate) elif (optimizer_name is "radam"): from keras_radam.training import RAdamOptimizer learning_rate_node = tf.train.exponential_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True, name="exp_de") optimizer = RAdamOptimizer(learning_rate=learning_rate_node) else: if not learning_rate is None: optimizer = tf.train.AdamOptimizer(learning_rate) learning_rate_node = tf.constant(0.0) else: learning_rate_node = tf.train.exponential_decay( learning_rate=learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate_node) if (optimizer_name is "adabound"): optimizer = optimizer.minimize(cost, global_step=global_step) else: optimizer = optimizer.minimize(cost) return optimizer, learning_rate_node
def fit(self, train_dataset, test_dataset, instance_names=['image'], epochs=10, learning_rate=1e-3, random_latent=None, recoding_dir='./recoding', gray_plot=True, generate_epoch=5, save_epoch=5, metric_epoch=10, gt_epoch=10, gt_data=None): assert isinstance(train_dataset, Iterable), 'dataset must be iterable' assert isinstance(test_dataset, Iterable), 'dataset must be iterable' self.dir_setup(recoding_dir) # generate random latent latent_shape = [50, self.latent_dim] if random_latent is None: random_latent = tf.random.normal(shape=latent_shape) if generate_epoch: generated = self.generate_sample(model=self.get_varibale, inputs_shape=self.inputs_shape, latent_shape=latent_shape, eps=random_latent) plot_and_save_generated(generated=generated, epoch=0, path=self.image_gen_dir, gray=gray_plot) self.optimizer = RAdamOptimizer(learning_rate) file_Name = os.path.join(self.csv_log_dir, 'TRAIN_' + self.model_name + '.csv') start_epoch = inspect_log(file_Name) early_stopper = EarlyStopping(name='on-Test dataset ELBO monitor', patience=5, min_delta=1e-6) epochs_pbar = tqdm(iterable=range(start_epoch, start_epoch + epochs), position=0, desc='Epochs Progress') for epoch in epochs_pbar: # training dataset tr_start_time = time.time() loss_tr = defaultdict() loss_tr['Epoch'] = epoch log_message('Training ... ', logging.INFO) for i, data_train in enumerate(train_dataset): data_train = self.cast_batch(data_train) total_loss = self.train_step(input=data_train, names=instance_names) tr_losses = self.evaluate_step(input=data_train, names=instance_names) loss_tr = self.reduce_sum_dict(tr_losses, loss_tr) epochs_pbar.set_description( 'Epochs Progress, Training Iterations {}'.format(i)) tr_end_time = time.time() loss_tr['Elapsed'] = '{:06f}'.format(tr_end_time - tr_start_time) # testing dataset val_start_time = time.time() loss_val = defaultdict() loss_val['Epoch'] = epoch log_message('Testing ... ', logging.INFO) tbar = tqdm(iterable=range(100), position=0, desc='Testing ...') for i, data_test in enumerate(test_dataset): data_test = self.cast_batch(data_test) val_losses = self.evaluate_step(input=data_test, names=instance_names) loss_val = self.reduce_sum_dict(val_losses, loss_val) montiored_loss = loss_val['Total'] tbar.update(i % 100) val_end_time = time.time() loss_val['Elapsed'] = '{:06f}'.format(val_end_time - val_start_time) if metric_epoch is not None and epoch % metric_epoch == 0: # testing dataset met_start_time = time.time() met_values = defaultdict() met_values['Epoch'] = epoch log_message('Evaluating Mertics ... ', logging.INFO) tbar = tqdm(iterable=range(100), position=0, desc='Evaluating ...') for i, data_test in enumerate(test_dataset): data_test = self.cast_batch(data_test) inputs = { 'X': data_test[instance_names[0]], 'y': self.feedforward(data_test[instance_names[0]]) } met_computed = compute_metrics(inputs) met_values = self.reduce_sum_dict(met_computed, met_values) tbar.update(i % 100) met_end_time = time.time() met_values['Elapsed'] = '{:06f}'.format(met_end_time - met_start_time) if epoch % gt_epoch == 0 and gt_data is not None: # testing dataset gt_start_time = time.time() gt_values = defaultdict() gt_values['Epoch'] = epoch log_message('Evaluating ground truth data ... ', logging.INFO) tbar = tqdm(iterable=range(100), position=0, desc='gt Evaluating ...') def rep_func(x): return self.feedforward(x)['latent'] us_scores = compute_unsupervised_metrics( ground_truth_data=gt_data, representation_function=rep_func, random_state=np.random.RandomState(0), num_train=10000, batch_size=32) s_scores = compute_supervised_metrics( ground_truth_data=gt_data, representation_function=rep_func, random_state=np.random.RandomState(0), num_train=10000, num_test=2000, continuous_factors=False, batch_size=32) ############################# display.clear_output(wait=False) log_message( "==================================================================", logging.INFO) file_Name = os.path.join(self.csv_log_dir, 'TRAIN_' + self.model_name) log(file_name=file_Name, message=dict(loss_tr), printed=True) log_message( "==================================================================", logging.INFO) log_message( "==================================================================", logging.INFO) file_Name = os.path.join(self.csv_log_dir, 'TEST_' + self.model_name) log(file_name=file_Name, message=dict(loss_val), printed=True) log_message( "==================================================================", logging.INFO) if epoch % metric_epoch: log_message( "==================================================================", logging.INFO) file_Name = os.path.join(self.csv_log_dir, 'Metrics_' + self.model_name) log(file_name=file_Name, message=dict(met_values), printed=True) log_message( "==================================================================", logging.INFO) if epoch % gt_epoch and gt_data is not None: gt_metrics = {**s_scores, **us_scores} log_message( "==================================================================", logging.INFO) file_Name = os.path.join(self.csv_log_dir, 'GroundTMetrics_' + self.model_name) log(file_name=file_Name, message=dict(gt_metrics), printed=True) log_message( "==================================================================", logging.INFO) if generate_epoch is not None and epoch % generate_epoch == 0: generated = self.generate_sample( model=self.get_varibale, inputs_shape=self.inputs_shape, latent_shape=latent_shape, eps=random_latent) plot_and_save_generated(generated=generated, epoch=epoch, path=self.image_gen_dir, gray=gray_plot, save=epoch % generate_epoch == 0) if epoch % save_epoch == 0: log_message('Saving Status in Epoch {}'.format(epoch), logging.CRITICAL) self.save_status() # Early stopping if (early_stopper.stop(montiored_loss)): log_message( 'Aborting Training after {} epoch because no progress ... ' .format(epoch), logging.WARN) break
def main(opt): opt.model_path = os.path.join(opt.model_path, form_model_path(opt)) checkfile = os.path.join(opt.model_path, 'cnn_tree.ckpt') ckpt = tf.train.get_checkpoint_state(opt.model_path) print("The model path : " + str(checkfile)) print("Loss : " + str(opt.loss)) if ckpt and ckpt.model_checkpoint_path: print("Continue training with old model : " + str(checkfile)) print("Loading vocabs.........") node_type_lookup, node_token_lookup, subtree_lookup = load_vocabs(opt) opt.node_type_lookup = node_type_lookup opt.node_token_lookup = node_token_lookup opt.subtree_lookup = subtree_lookup if opt.task == 1: train_dataset = CodeClassificationData(opt, True, False, False) if opt.task == 0: val_opt = copy.deepcopy(opt) val_opt.node_token_lookup = node_token_lookup validation_dataset = CodeClassificationData(val_opt, False, False, True) print("Initializing tree caps model...........") corder = CorderModel(opt) print("Finished initializing corder model...........") loss_node = corder.loss optimizer = RAdamOptimizer(opt.lr) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): training_point = optimizer.minimize(loss_node) saver = tf.train.Saver(save_relative_paths=True, max_to_keep=5) init = tf.global_variables_initializer() # best_f1_score = get_best_f1_score(opt) # print("Best f1 score : " + str(best_f1_score)) with tf.Session() as sess: sess.run(init) if ckpt and ckpt.model_checkpoint_path: print("Continue training with old model") print("Checkpoint path : " + str(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) for i, var in enumerate(saver._var_list): print('Var {}: {}'.format(i, var)) if opt.task == 1: for epoch in range(1, opt.epochs + 1): train_batch_iterator = ThreadedIterator( train_dataset.make_minibatch_iterator(), max_queue_size=opt.worker) train_accs = [] for train_step, train_batch_data in enumerate( train_batch_iterator): print("--------------------------") # print(train_batch_data["batch_subtrees_ids"]) logging.info(str(train_batch_data["batch_subtree_id"])) _, err = sess.run( [training_point, corder.loss], feed_dict={ corder.placeholders["node_types"]: train_batch_data["batch_node_types"], corder.placeholders["node_tokens"]: train_batch_data["batch_node_tokens"], corder.placeholders["children_indices"]: train_batch_data["batch_children_indices"], corder.placeholders["children_node_types"]: train_batch_data["batch_children_node_types"], corder.placeholders["children_node_tokens"]: train_batch_data["batch_children_node_tokens"], corder.placeholders["labels"]: train_batch_data["batch_subtree_id"], corder.placeholders["dropout_rate"]: 0.3 }) logging.info("Training at epoch " + str(epoch) + " and step " + str(train_step) + " with loss " + str(err)) print("Epoch:", epoch, "Step:", train_step, "Training loss:", err) if train_step % opt.checkpoint_every == 0 and train_step > 0: saver.save(sess, checkfile) print('Checkpoint saved, epoch:' + str(epoch) + ', step: ' + str(train_step) + ', loss: ' + str(err) + '.') if opt.task == 0: validation_batch_iterator = ThreadedIterator( validation_dataset.make_minibatch_iterator(), max_queue_size=opt.worker) for val_step, val_batch_data in enumerate( validation_batch_iterator): scores = sess.run( [corder.code_vector], feed_dict={ corder.placeholders["node_types"]: val_batch_data["batch_node_types"], corder.placeholders["node_tokens"]: val_batch_data["batch_node_tokens"], corder.placeholders["children_indices"]: val_batch_data["batch_children_indices"], corder.placeholders["children_node_types"]: val_batch_data["batch_children_node_types"], corder.placeholders["children_node_tokens"]: val_batch_data["batch_children_node_tokens"], corder.placeholders["dropout_rate"]: 0.0 }) for i, vector in enumerate(scores[0]): file_name = "analysis/rosetta_sampled_softmax_train.csv" with open(file_name, "a") as f: vector_score = [] for score in vector: vector_score.append(str(score)) # print(val_batch_data["batch_file_path"]) line = str(val_batch_data["batch_file_path"] [i]) + "," + " ".join(vector_score) f.write(line) f.write("\n")
def main(train_opt, test_opt): train_opt.model_path = os.path.join( train_opt.model_path, util_functions.form_tbcnn_model_path(train_opt)) checkfile = os.path.join(train_opt.model_path, 'cnn_tree.ckpt') ckpt = tf.train.get_checkpoint_state(train_opt.model_path) print("The model path : " + str(checkfile)) if ckpt and ckpt.model_checkpoint_path: print("-------Continue training with old model-------- : " + str(checkfile)) tbcnn_model = TBCNN(train_opt) tbcnn_model.feed_forward() train_data_loader = BaseDataLoader(train_opt.batch_size, train_opt.label_size, train_opt.tree_size_threshold_upper, train_opt.tree_size_threshold_lower, train_opt.train_path, True) test_data_loader = BaseDataLoader(test_opt.batch_size, test_opt.label_size, test_opt.tree_size_threshold_upper, test_opt.tree_size_threshold_lower, test_opt.test_path, False) optimizer = RAdamOptimizer(train_opt.lr) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): training_point = optimizer.minimize(tbcnn_model.loss) saver = tf.train.Saver(save_relative_paths=True, max_to_keep=5) init = tf.global_variables_initializer() best_f1 = test_opt.best_f1 with tf.Session() as sess: sess.run(init) if ckpt and ckpt.model_checkpoint_path: print("Continue training with old model") print("Checkpoint path : " + str(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) for i, var in enumerate(saver._var_list): print('Var {}: {}'.format(i, var)) for epoch in range(1, train_opt.epochs + 1): train_batch_iterator = ThreadedIterator( train_data_loader.make_minibatch_iterator(), max_queue_size=train_opt.worker) for train_step, train_batch_data in enumerate( train_batch_iterator): print("***************") # print(train_batch_data["batch_node_index"].shape) # print(train_batch_data["batch_node_type_id"].shape) # print(train_batch_data["batch_node_sub_tokens_id"].shape) # print(train_batch_data["batch_children_index"].shape) # print(train_batch_data["batch_children_node_type_id"].shape) # print(train_batch_data["batch_children_node_sub_tokens_id"].shape) # print(train_batch_data["batch_labels_one_hot"]) # print("Labels : " + str(train_batch_data["batch_labels"])) # print("Tree sizes : " + str(train_batch_data["batch_size"])) # for children_index in train_batch_data["batch_children_index"]: # print("Children_index : " + str(len(children_index))) _, err = sess.run( [training_point, tbcnn_model.loss], feed_dict={ tbcnn_model.placeholders["node_type"]: train_batch_data["batch_node_type_id"], tbcnn_model.placeholders["node_token"]: train_batch_data["batch_node_sub_tokens_id"], tbcnn_model.placeholders["children_index"]: train_batch_data["batch_children_index"], tbcnn_model.placeholders["children_node_type"]: train_batch_data["batch_children_node_type_id"], tbcnn_model.placeholders["children_node_token"]: train_batch_data["batch_children_node_sub_tokens_id"], tbcnn_model.placeholders["labels"]: train_batch_data["batch_labels_one_hot"], tbcnn_model.placeholders["dropout_rate"]: 0.3 }) print("Epoch:", epoch, "Step:", train_step, "Loss:", err, "Best F1:", best_f1) if train_step % train_opt.checkpoint_every == 0 and train_step > 0: #Perform Validation print("Perform validation.....") correct_labels = [] predictions = [] test_batch_iterator = ThreadedIterator( test_data_loader.make_minibatch_iterator(), max_queue_size=test_opt.worker) for test_step, test_batch_data in enumerate( test_batch_iterator): print("***************") print(test_batch_data["batch_size"]) scores = sess.run( [tbcnn_model.softmax], feed_dict={ tbcnn_model.placeholders["node_type"]: test_batch_data["batch_node_type_id"], tbcnn_model.placeholders["node_token"]: test_batch_data["batch_node_sub_tokens_id"], tbcnn_model.placeholders["children_index"]: test_batch_data["batch_children_index"], tbcnn_model.placeholders["children_node_type"]: test_batch_data["batch_children_node_type_id"], tbcnn_model.placeholders["children_node_token"]: test_batch_data[ "batch_children_node_sub_tokens_id"], tbcnn_model.placeholders["labels"]: test_batch_data["batch_labels_one_hot"], tbcnn_model.placeholders["dropout_rate"]: 0.0 }) batch_correct_labels = list( np.argmax(test_batch_data["batch_labels_one_hot"], axis=1)) batch_predictions = list(np.argmax(scores[0], axis=1)) print(batch_correct_labels) print(batch_predictions) correct_labels.extend( np.argmax(test_batch_data["batch_labels_one_hot"], axis=1)) predictions.extend(np.argmax(scores[0], axis=1)) print(correct_labels) print(predictions) f1 = float( f1_score(correct_labels, predictions, average="micro")) print(classification_report(correct_labels, predictions)) print('F1:', f1) print('Best F1:', best_f1) # print(confusion_matrix(correct_labels, predictions)) if f1 > best_f1: best_f1 = f1 saver.save(sess, checkfile) print('Checkpoint saved, epoch:' + str(epoch) + ', step: ' + str(train_step) + ', loss: ' + str(err) + '.')