def adversarial_training_WGAN(arguments, train, data_cols, label_cols=[], seed=0, starting_step=0): [ rand_dim, nb_steps, batch_size, k_d, k_g, critic_pre_train_steps, log_interval, learning_rate, base_n_count, data_dir, generator_model_path, discriminator_model_path, loss_pickle_path, show ] = arguments np.random.seed(seed) # set random seed data_dim = len(data_cols) print('data_dim: ', data_dim) print('data_cols: ', data_cols) label_dim = 0 with_class = False if len(label_cols) > 0: with_class = True label_dim = len(label_cols) print('label_dim: ', label_dim) print('label_cols: ', label_cols) # define network models K.set_learning_phase(1) # 1 = train if with_class: cache_prefix = 'WCGAN' generator_model, discriminator_model, combined_model = define_models_CGAN( rand_dim, data_dim, label_dim, base_n_count, type='Wasserstein') else: cache_prefix = 'WGAN' generator_model, discriminator_model, combined_model = define_models_GAN( rand_dim, data_dim, base_n_count, type='Wasserstein') # construct computation graph for calculating the gradient penalty (improved WGAN) and training the discriminator _z = tf.placeholder(tf.float32, shape=(batch_size, rand_dim)) _labels = None if with_class: _x = tf.placeholder(tf.float32, shape=(batch_size, data_dim + label_dim)) _labels = tf.placeholder(tf.float32, shape=(batch_size, label_dim)) # updated for class _g_z = generator_model(inputs=[_z, _labels]) # updated for class else: _x = tf.placeholder(tf.float32, shape=(batch_size, data_dim)) _g_z = generator_model(_z) epsilon = tf.placeholder(tf.float32, shape=(batch_size, 1)) x_hat = epsilon * _x + (1.0 - epsilon) * _g_z gradients = tf.gradients(discriminator_model(x_hat), [x_hat]) _gradient_penalty = 10.0 * tf.square(tf.norm(gradients[0], ord=2) - 1.0) # calculate discriminator's loss _disc_loss_generated = em_loss(tf.ones(batch_size), discriminator_model(_g_z)) _disc_loss_real = em_loss(tf.ones(batch_size), discriminator_model(_x)) _disc_loss = _disc_loss_generated - _disc_loss_real + _gradient_penalty # update f by taking an SGD step on mini-batch loss LD(f) disc_optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate, beta1=0.5, beta2=0.9).minimize(_disc_loss, var_list=discriminator_model.trainable_weights) sess = K.get_session() # compile models adam = optimizers.Adam(lr=learning_rate, beta_1=0.5, beta_2=0.9) discriminator_model.trainable = False combined_model.compile(optimizer=adam, loss=[em_loss]) combined_loss, disc_loss_generated, disc_loss_real, xgb_losses = [], [], [], [] model_components = [ cache_prefix, with_class, starting_step, train, data_cols, data_dim, label_cols, label_dim, generator_model, discriminator_model, combined_model, rand_dim, nb_steps, batch_size, k_d, k_g, critic_pre_train_steps, log_interval, learning_rate, base_n_count, data_dir, generator_model_path, discriminator_model_path, sess, _z, _x, _labels, _g_z, epsilon, x_hat, gradients, _gradient_penalty, _disc_loss_generated, _disc_loss_real, _disc_loss, disc_optimizer, show, combined_loss, disc_loss_generated, disc_loss_real, xgb_losses ] if show: print(generator_model.summary()) print(discriminator_model.summary()) print(combined_model.summary()) if loss_pickle_path: print('Loading loss pickles') [combined_loss, disc_loss_generated, disc_loss_real, xgb_losses] = pickle.load(open(loss_pickle_path, 'rb')) if generator_model_path: print('Loading generator model') generator_model.load_weights(generator_model_path) #, by_name=True) if discriminator_model_path: print('Loading discriminator model') discriminator_model.load_weights( discriminator_model_path) #, by_name=True) else: print('pre-training the critic...') K.set_learning_phase(1) # 1 = train for i in range(critic_pre_train_steps): if i % 20 == 0: print('Step: {} of {} critic pre-training.'.format( i, critic_pre_train_steps)) loss = train_discriminator_step(model_components, seed=i) print('Last batch of critic pre-training disc_loss: {}.'.format(loss)) model_components = [ cache_prefix, with_class, starting_step, train, data_cols, data_dim, label_cols, label_dim, generator_model, discriminator_model, combined_model, rand_dim, nb_steps, batch_size, k_d, k_g, critic_pre_train_steps, log_interval, learning_rate, base_n_count, data_dir, generator_model_path, discriminator_model_path, sess, _z, _x, _labels, _g_z, epsilon, x_hat, gradients, _gradient_penalty, _disc_loss_generated, _disc_loss_real, _disc_loss, disc_optimizer, show, combined_loss, disc_loss_generated, disc_loss_real, xgb_losses ] [combined_loss, disc_loss_generated, disc_loss_real, xgb_losses] = training_steps_WGAN(model_components)
def get_model(framework, model_variant): """ Load the desired EfficientPose model variant using the requested deep learning framework. Args: framework: string Deep learning framework to use (Keras, TensorFlow, TensorFlow Lite or PyTorch) model_variant: string EfficientPose model to utilize (RT, I, II, III, IV, RT_Lite, I_Lite or II_Lite) Returns: Initialized EfficientPose model and corresponding resolution. """ # Keras if framework in ['keras', 'k']: from tensorflow.keras.backend import set_learning_phase from tensorflow.keras.models import load_model set_learning_phase(0) model = load_model(join( 'models', 'keras', 'EfficientPose{0}.h5'.format(model_variant.upper())), custom_objects={ 'BilinearWeights': helpers.keras_BilinearWeights, 'Swish': helpers.Swish(helpers.eswish), 'eswish': helpers.eswish, 'swish1': helpers.swish1 }) # TensorFlow elif framework in ['tensorflow', 'tf']: from tensorflow.python.platform.gfile import FastGFile from tensorflow.compat.v1 import GraphDef from tensorflow.compat.v1.keras.backend import get_session from tensorflow import import_graph_def f = FastGFile( join('models', 'tensorflow', 'EfficientPose{0}.pb'.format(model_variant.upper())), 'rb') graph_def = GraphDef() graph_def.ParseFromString(f.read()) f.close() model = get_session() model.graph.as_default() import_graph_def(graph_def) # TensorFlow Lite elif framework in ['tensorflowlite', 'tflite']: from tensorflow import lite model = lite.Interpreter(model_path=join( 'models', 'tflite', 'EfficientPose{0}.tflite'.format( model_variant.upper()))) model.allocate_tensors() # PyTorch elif framework in ['pytorch', 'torch']: from imp import load_source from torch import load, quantization, backends try: MainModel = load_source( 'MainModel', join('models', 'pytorch', 'EfficientPose{0}.py'.format(model_variant.upper()))) except: print( '\n##########################################################################################################' ) print( 'Desired model "EfficientPose{0}Lite" not available in PyTorch. Please select among "RT", "I", "II", "III" or "IV".' .format(model_variant.split('lite')[0].upper())) print( '##########################################################################################################\n' ) return False, False model = load( join('models', 'pytorch', 'EfficientPose{0}'.format(model_variant.upper()))) model.eval() qconfig = quantization.get_default_qconfig('qnnpack') backends.quantized.engine = 'qnnpack' return model, { 'rt': 224, 'i': 256, 'ii': 368, 'iii': 480, 'iv': 600, 'rt_lite': 224, 'i_lite': 256, 'ii_lite': 368 }[model_variant]
def calculate_losses_from_generator(tg, model, num_steps=None, stepsize=1, verbose=0): """ Keras evaluate_generator only returns a scalar loss (mean) while predict_generator only returns the predictions but not the real labels TODO Make it batch size independent Parameters ---------- tg : object Data generator model : object Keras model num_steps : int, optional How many steps should be evaluated, by default None (runs through full experiment) stepsize : int, optional Determines how many samples will be evaluated. 1 -> N samples evaluated, 2 -> N/2 samples evaluated, etc..., by default 1 verbose : int, optional Verbosity level Returns ------- losses : (N,1) array_like Loss between predicted and ground truth observation predictions : dict Dictionary with predictions for each behaviour, each item in dict has size (N, Z) with Z the dimensions of the sample (e.g. Z_position=2, Z_speed=1, ...) indices : (N,1) array_like Indices which were evaluated, important when taking stepsize unequal to 1 """ # X.) Parse inputs if num_steps is None: num_steps = len(tg) # 1.) Make a copy and adjust attributes tmp_dict = tg.__dict__.copy() if tg.batch_size != 1: tg.batch_size = 1 tg.random_batches = False tg.shuffle = False tg.sample_size = tg.model_timesteps * tg.batch_size # 2.) Get output tensors sess = K.get_session() (_, test_out) = tg.__getitem__(0) real_tensor, calc_tensors = K.placeholder(), [] for output_index in range(0, len(test_out)): prediction_tensor = model.outputs[output_index] loss_tensor = model.loss_functions[output_index].fn( real_tensor, prediction_tensor) calc_tensors.append((prediction_tensor, loss_tensor)) # 3.) Predict losses, predictions, indices = [], [], [] for i in range(0, num_steps, stepsize): (in_tg, out_tg) = tg.__getitem__(i) indices.append(tg.cv_indices[i]) loss, prediction = [], [] for o in range(0, len(out_tg)): evaluated = sess.run(calc_tensors[o], feed_dict={ model.input: in_tg, real_tensor: out_tg[o] }) prediction.append(evaluated[0][0, ...]) loss.append(evaluated[1][0, ...]) # Get rid of batch dimensions predictions.append(prediction) losses.append(loss) if verbose > 0 and not i % 50: print('{} / {}'.format(i, num_steps), end='\r') if verbose > 0: print('Performed {} gradient steps'.format(num_steps // stepsize)) losses, predictions, indices = np.array(losses), swap_listaxes( predictions), np.array(indices) tg.__dict__.update(tmp_dict) return losses, predictions, indices
def test_adv(images, labels, batch_size, model, adv_model, daug_params, attack_params): """ Tests the performance of a model on adversarial images. The adversarial images are computed according to the attack specified in the arguments. Parameters ---------- images : dask array The set of images labels : dask array The ground truth labels batch_size : int Batch size model : Keras Model The model adv_model : Keras Model The model used to generate adversarial examples daug_params : dict Dictionary of data augmentation parameters attack_params : dict Dictionary of the attack parameters Returns ------- results_dict : dict Dictionary containing some performance metrics """ # Get session sess = K.get_session() # Initialize adversarial attack attack, attack_params_cleverhans, bs = init_attack( adv_model, attack_params, sess) if bs: batch_size = bs n_images = images.shape[0] n_classes = labels.shape[1] n_batches_per_epoch = int(np.ceil(float(n_images) / batch_size)) # Create batch generator image_gen = get_generator(images, **daug_params) batch_gen = batch_generator(image_gen, images, labels, batch_size, aug_per_im=1, shuffle=False) # Define input TF placeholder if daug_params['crop_size']: image_shape = daug_params['crop_size'] else: image_shape = images.shape[1:] x = tf.placeholder(K.floatx(), shape=(bs,) + tuple(image_shape)) y = tf.placeholder(K.floatx(), shape=(bs,) + (n_classes,)) # Define adversarial predictions symbolically x_adv = attack.generate(x, **attack_params_cleverhans) x_adv = tf.stop_gradient(x_adv) predictions_adv = model(x_adv) # Define accuracy and mean squared error symbolically correct_preds = tf.equal(tf.argmax(y, axis=-1), tf.argmax(predictions_adv, axis=-1)) acc_value = tf.reduce_mean(tf.to_float(correct_preds)) mse_value = tf.reduce_mean(tf.square(tf.subtract(x, x_adv))) # Init results variables accuracy = 0.0 mse = 0.0 with sess.as_default(): init = 0 for _ in tqdm(range(n_batches_per_epoch)): batch = next(batch_gen()) this_batch_size = batch[0].shape[0] # Evaluate accuracy if isinstance(batch[1], (list, )): yy = batch[1][0] else: yy = batch[1] # Evaluate accuracy and MSE batch_acc = acc_value.eval(feed_dict={x: batch[0], y: yy, K.learning_phase(): 0}) accuracy += (this_batch_size * batch_acc) batch_mse = mse_value.eval(feed_dict={x: batch[0], K.learning_phase(): 0}) mse += (this_batch_size * batch_mse) init += this_batch_size accuracy /= n_images mse /= n_images results_dict = {'mean_acc': accuracy, 'mean_mse': mse} return results_dict
input_ids = [[3, 6, 5, 8, 9]] input_mask = [[1, 1, 1, 1, 1]] token_type_ids = [[0, 0, 0, 0, 0]] #masked_lm_positions = [[2, 4]] #masked_lm_weights = [[1.0, 1.0]] #masked_lm_ids = [[15, 20]] masked_lm_ids = [[3, 6, 5, 8, 9]] input_ids[0].extend([0 for _ in range(64 - len(input_ids[0]))]) input_mask[0].extend([0 for _ in range(64 - len(input_mask[0]))]) token_type_ids[0].extend([0 for _ in range(64 - len(token_type_ids[0]))]) ''' print('Start unit testing : BERTWrapper') sess = K.get_session() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init) test_data = [ ['Hello', 'World'], ['Hello', 'World'], ['Hello', 'World'], ['Hello', 'World'], ] input_vals = itokens.encode(test_data, max_length=16) output_vals = otokens.encode(test_data, max_length=16) print(input_vals) print(output_vals)
def compute_ig_within_GRU(seq2seq, decoder_states_container, decoder_inputs_container, real_h, target_gate="z", target_class=[0], reference=False, k=32): """ Compute ig within GRU for several gates. Arguments: decoder_states_container, decoder_inputs_container: For only one time step, the inputs of decoder GRU cell. target_gate: A list of {"h", "r", "z"}. target_class: A list of int. """ assert target_gate == "z" or target_gate == "r" or target_gate == "h", "No this gate." print("\nCompute on %s gate." % target_gate) weight = seq2seq.decoder_model.get_layer("decoder_gru").get_weights() emb_layer_model = Model( inputs=seq2seq.decoder_model.get_layer('decoder_emb').get_input_at(-1), outputs=seq2seq.decoder_model.get_layer('decoder_emb').output) inputs = np.squeeze(emb_layer_model.predict(decoder_inputs_container, verbose=0), axis=1) states = np.copy(decoder_states_container) h_tm1, h, z, r, hh, x_h, split_recurrent_h = gruig.get_GRU_components( inputs, states, weight) weight_array = np.concatenate( [weight[0], weight[1], np.expand_dims(weight[2], axis=0)], axis=0) units = seq2seq.units if target_gate == "h": gate_model = gruig.build_GRU_with_h_gate_model(seq2seq) gate_model.get_layer("wx_h").set_weights( [weight[0][:, units * 2:], weight[2][units * 2:]]) gate_model.get_layer("uh_h").set_weights([weight[1][:, units * 2:]]) y = gate_model.predict([h_tm1, inputs, z, r], steps=1) feed_dict = { gate_model.input[1]: inputs, gate_model.input[2]: z, gate_model.input[3]: r, } real = real_h elif target_gate == "z": gate_model = gruig.build_GRU_with_z_gate_model(seq2seq, weight_array) y = gate_model.predict([h_tm1, inputs, r, hh], steps=1) feed_dict = { gate_model.input[1]: inputs, gate_model.input[2]: r, gate_model.input[3]: hh } real = y elif target_gate == "r": gate_model = gruig.build_GRU_with_r_gate_model(seq2seq, weight_array) y = gate_model.predict([h_tm1, inputs, z, x_h, split_recurrent_h], steps=1) feed_dict = { gate_model.input[1]: inputs, gate_model.input[2]: z, gate_model.input[3]: x_h, gate_model.input[4]: split_recurrent_h } real = y assert np.mean( np.abs(y - real)) < 1e-6, "Wrong computation for error = %.8f" % np.mean( np.abs(y - real)) print("delta =", np.mean(np.abs(y - real))) #assert np.mean(np.abs(y - h)) < 1e-6, "Wrong computation for error = %.8f" % np.mean(np.abs(y - h)) interpolate, num_steps, step_size = linearly_interpolate( decoder_states_container) # (50, N, 10), int, (N, 10) result = np.zeros(decoder_states_container.shape) # (N, 256), total_result = np.zeros(decoder_states_container.shape) sess = K.get_session() for class_ in target_class: print("Class index =", class_) gradient = gradients(gate_model.output[:, class_], gate_model.input[0]) result = np.zeros(decoder_states_container.shape) for i in range(num_steps): feed_dict[gate_model.input[0]] = interpolate[i] x = sess.run(gradient[0], feed_dict=feed_dict) result += x result = np.multiply(result, step_size) """if target_gate == "h": score = np.abs(np.mean(result, axis=0)) print("selected =", list(np.argsort(score)[::-1][:k])) print("other =", list(np.argsort(score)[::-1][k:])) print(np.sort(score)[::-1][:4]) """ total_result += result total_result /= float(len(target_class)) score = np.abs(np.mean(total_result, axis=0)) print("(total) selected =", list(np.argsort(score)[::-1][:k])) #print("other =", list(np.argsort(score)[::-1][k:])) print(np.sort(score)[::-1][:4]) return score
def predict(predict_var, x_unlabeled, inputs, y_true, batch_sizes, x_labeled=None, y_labeled=None): """Evaluates predict_var, batchwise, over all points in x_unlabeled and x_labeled. Args: predict_var: list of tensors to evaluate and return x_unlabeled: unlabeled input data inputs: dictionary containing input_types and input_placeholders as key, value pairs, respectively y_true: true labels tensorflow placeholder batch_sizes: dictionary containing input_types and batch_sizes as key, value pairs, respectively x_labeled: labeled input data y_labeled: labeled input labels Returns: a list of length n containing the result of all tensors in return_var, where n = len(x_unlabeled) + len(x_labeled) """ x_unlabeled, x_labeled, y_labeled = check_inputs(x_unlabeled, x_labeled, y_labeled, y_true) # combined data x = np.concatenate((x_unlabeled, x_labeled), 0) # get shape of y_true y_shape = y_true.get_shape()[1:K.ndim(y_true)].as_list() # calculate batches for predict loop unlabeled_batch_size = batch_sizes.get('Unlabeled', 0) labeled_batch_size = batch_sizes.get('Labeled', 0) if 'Labeled' in batch_sizes and 'Unlabeled' in batch_sizes: assert unlabeled_batch_size == labeled_batch_size batch_size = min(len(x), max(unlabeled_batch_size, labeled_batch_size)) batches = make_batches(len(x), batch_size) y_preds = [] # predict over all points for _, (batch_start, batch_end) in enumerate(batches): feed_dict = {K.learning_phase(): 0} # feed corresponding input for each input_type for input_type, input_placeholder in inputs.items(): if input_type == 'Unlabeled': feed_dict[input_placeholder] = x[batch_start:batch_end] elif input_type == 'Labeled': if x_labeled: batch_ids = np.random.choice( len(x_labeled), size=min(batch_sizes[input_type], len(x_labeled)), replace=False) feed_dict[input_placeholder] = x_labeled[batch_ids] feed_dict[y_true] = y_labeled[batch_ids] else: # we have no labeled points, so feed an empty array feed_dict[input_placeholder] = x[0:0] feed_dict[y_true] = np.empty([0] + y_shape) # evaluate the batch y_pred_batch = np.asarray(K.get_session().run( predict_var, feed_dict=feed_dict)) y_preds.append(y_pred_batch) if y_preds[0].shape: return np.concatenate(y_preds) else: return np.sum(y_preds)
def train_step(return_vars, updates, x_unlabeled, inputs, y_true, batch_sizes, x_labeled=None, y_labeled=None, batches_per_epoch=100): """Performs one training step. Evaluates the tensors in return_vars and updates, then returns the values of the tensors in return_vars. Args: return_vars: list of tensors to evaluate and return updates: list of tensors to evaluate only x_unlabeled: unlabeled input data inputs: dictionary containing input_types and input_placeholders as key, value pairs, respectively y_true: true labels placeholder batch_sizes: dictionary containing input_types and batch_sizes as key, value pairs, respectively x_labeled: labeled input data y_labeled: labeled input labels batches_per_epoch: parameter updates per epoch* Returns: the evaluated result of all tensors in return_vars, summed across all epochs *note: the term epoch is used loosely here, it does not necessarily refer to one iteration over the entire dataset. instead, it is just batches_per_epoch parameter updates. """ x_unlabeled, x_labeled, y_labeled = check_inputs(x_unlabeled, x_labeled, y_labeled, y_true) # combine data x = np.concatenate((x_unlabeled, x_labeled), 0) # get shape of y_true y_shape = y_true.get_shape()[1:K.ndim(y_true)].as_list() return_vars_ = np.zeros(shape=(len(return_vars))) # train batches_per_epoch batches for _ in range(0, batches_per_epoch): feed_dict = {K.learning_phase(): 1} # feed corresponding input for each input_type for input_type, input_placeholder in inputs.items(): if input_type == 'Labeled': if x_labeled: batch_ids = np.random.choice( len(x_labeled), size=min(batch_sizes[input_type], len(x_labeled)), replace=False) feed_dict[input_placeholder] = x_labeled[batch_ids] feed_dict[y_true] = y_labeled[batch_ids] else: # we have no labeled points, so feed an empty array feed_dict[input_placeholder] = x[0:0] feed_dict[y_true] = np.empty([0] + y_shape) elif input_type == 'Unlabeled': if x_unlabeled: batch_ids = np.random.choice( len(x_unlabeled), size=batch_sizes[input_type], replace=False) feed_dict[input_placeholder] = x_unlabeled[batch_ids] else: # we have no unlabeled points, so feed an empty array feed_dict[input_placeholder] = x[0:0] all_vars = return_vars + updates return_vars_ += np.asarray(K.get_session().run( all_vars, feed_dict=feed_dict)[:len(return_vars)]) return return_vars_
def load_preaggregated_data(self): # Return objects of this function X = None Y = None X_valid = None Y_valid = None # Load pre-aggregated training dataset tfrecord_file_list = os.listdir(self.preaggregated_data_path) tfrecord_file_list = [ os.path.join(self.preaggregated_data_path, k) for k in tfrecord_file_list ] print('Pre-aggregated file list = ' + str(tfrecord_file_list)) reader = tf.TFRecordReader() key, examples = reader.read( tf.train.string_input_producer( tfrecord_file_list, num_epochs=1)) # Only generate all data once name_to_features = { "input_ids": tf.io.FixedLenFeature([self.max_seq_length], tf.int64), "input_mask": tf.io.FixedLenFeature([self.max_seq_length], tf.int64), "segment_ids": tf.io.FixedLenFeature([self.max_seq_length], tf.int64), } parsed_example = tf.parse_single_example(examples, name_to_features) parsed_example_values = list(parsed_example.values()) # Reuse Keras Session sess = K.get_session() # Just read all data into array for now. # TODO: Implment generator to support very large dataset that is not fit into RAM all_data = [] sess.run(tf.initialize_local_variables()) tf.train.start_queue_runners(sess=sess) try: while True: data = sess.run(parsed_example_values) for i in range(len(data)): if len(all_data) <= i: all_data.append([]) all_data[i].append(data[i]) except tf.errors.OutOfRangeError: pass all_data = [np.array(a) for a in all_data] X = all_data Y = all_data[0] # Y is only 'input_ids' tensor K.clear_session() # sess object is not valid anymore after this # Load pre-aggregated validation dataset tfrecord_file_list = os.listdir( self.preaggregated_validation_data_path) tfrecord_file_list = [ os.path.join(self.preaggregated_validation_data_path, k) for k in tfrecord_file_list ] print('Pre-aggregated file list = ' + str(tfrecord_file_list)) reader = tf.TFRecordReader() key, examples = reader.read( tf.train.string_input_producer( tfrecord_file_list, num_epochs=1)) # Only generate all data once name_to_features = { "input_ids": tf.io.FixedLenFeature([self.max_seq_length], tf.int64), "input_mask": tf.io.FixedLenFeature([self.max_seq_length], tf.int64), "segment_ids": tf.io.FixedLenFeature([self.max_seq_length], tf.int64), } parsed_example = tf.parse_single_example(examples, name_to_features) parsed_example_values = list(parsed_example.values()) # Reuse Keras Session sess = K.get_session() # Just read all data into array for now. # TODO: Implment generator to support very large dataset that is not fit into RAM all_data = [] sess.run(tf.initialize_local_variables()) tf.train.start_queue_runners(sess=sess) try: while True: data = sess.run(parsed_example_values) for i in range(len(data)): if len(all_data) <= i: all_data.append([]) all_data[i].append(data[i]) except tf.errors.OutOfRangeError: pass all_data = [np.array(a) for a in all_data] X_valid = all_data Y_valid = all_data[0] # Y is only 'input_ids' tensor K.clear_session() # sess object is not valid anymore after this #print(len(X_valid)) #print(len(Y_valid)) return (X, Y, X_valid, Y_valid)
def main(args): # If output_model path is relative and in cwd, make it absolute from root output_model = FLAGS.output_model if str(Path(output_model).parent) == '.': output_model = str((Path.cwd() / output_model)) output_fld = Path(output_model).parent output_model_name = Path(output_model).name output_model_stem = Path(output_model).stem output_model_pbtxt_name = output_model_stem + '.pbtxt' # Create output directory if it does not exist Path(output_model).parent.mkdir(parents=True, exist_ok=True) if FLAGS.channels_first: K.set_image_data_format('channels_first') else: K.set_image_data_format('channels_last') custom_object_dict = get_custom_objects() model = load_input_model(FLAGS.input_model, FLAGS.input_model_json, FLAGS.input_model_yaml, custom_objects=custom_object_dict) # TODO(amirabdi): Support networks with multiple inputs orig_output_node_names = [node.op.name for node in model.outputs] if FLAGS.output_nodes_prefix: num_output = len(orig_output_node_names) pred = [None] * num_output converted_output_node_names = [None] * num_output # Create dummy tf nodes to rename output for i in range(num_output): converted_output_node_names[i] = '{}{}'.format( FLAGS.output_nodes_prefix, i) pred[i] = tf.identity(model.outputs[i], name=converted_output_node_names[i]) else: converted_output_node_names = orig_output_node_names logging.info('Converted output node names are: %s', str(converted_output_node_names)) sess = K.get_session() if FLAGS.output_meta_ckpt: saver = tf.train.Saver() saver.save(sess, str(output_fld / output_model_stem)) if FLAGS.save_graph_def: tf.train.write_graph(sess.graph.as_graph_def(), str(output_fld), output_model_pbtxt_name, as_text=True) logging.info('Saved the graph definition in ascii format at %s', str(Path(output_fld) / output_model_pbtxt_name)) if FLAGS.quantize: from tensorflow.tools.graph_transforms import TransformGraph transforms = ["quantize_weights", "quantize_nodes"] transformed_graph_def = TransformGraph(sess.graph.as_graph_def(), [], converted_output_node_names, transforms) constant_graph = graph_util.convert_variables_to_constants( sess, transformed_graph_def, converted_output_node_names) else: constant_graph = graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), converted_output_node_names) graph_io.write_graph(constant_graph, str(output_fld), output_model_name, as_text=False) logging.info('Saved the freezed graph at %s', str(Path(output_fld) / output_model_name))
for node in input_graph_def.node: node.device = "" frozen_graph = convert_variables_to_constants(session, input_graph_def, output_names, freeze_var_names) # f = tf.gfile.FastGFile(os.path.join('log', 'fronzen_model.pb'), "wb") # f.write(frozen_graph.SerializeToString()) return frozen_graph # src = "/root/optimization/ocrSecurity/ocr_onnx/outputs/detector/checkpoints/OCR_default/generator_scale_0.h5" # dst = "/root/optimization/ocrSecurity/ocr_onnx/outputs/detector/checkpoints/OCR_default/generator_scale_0.pb" src = "/root/optimization/ocrSecurity/ocr_onnx/outputs/detector/checkpoints/OCR_default/final_model_20200313_1.h5" dst = "/root/optimization/ocrSecurity/ocr_onnx/outputs/detector/checkpoints/OCR_default/final_model_20200313_1.pb" restored_model = tf.keras.models.load_model(src, compile=True) onnx_model = keras2onnx.convert_keras(restored_model, restored_model.name) keras2onnx.save_model( onnx_model, '/root/optimization/ocrSecurity/ocr_onnx/outputs/detector/checkpoints/OCR_default/final_model_20200313_1.onnx' ) frozen_graph = freeze_session( K.get_session(), output_names=[out.op.name for out in restored_model.outputs], clear_devices=True) tf.train.write_graph(frozen_graph, "/tmp", dst, as_text=False) print("finished")
def __init__(self, inputs, arch, cnc_reg, y_true, y_train_labeled_onehot, n_clusters, affinity, scale_nbr, n_nbrs, batch_sizes, result_path, dset, siamese_net=None, x_train=None, lr=0.01, temperature=1.0, bal_reg=0.0): self.y_true = y_true self.y_train_labeled_onehot = y_train_labeled_onehot self.inputs = inputs self.batch_sizes = batch_sizes self.result_path = result_path self.lr = lr self.temperature = temperature # generate layers self.layers = util.make_layer_list(arch[:-1], 'cnc', cnc_reg) print('Runing with CNC loss') self.layers += [{ 'type': 'None', 'size': n_clusters, 'l2_reg': cnc_reg, 'name': 'cnc_{}'.format(len(arch)) }] # create CncNet self.outputs = stack_layers(self.inputs, self.layers) self.net = Model(inputs=self.inputs['Unlabeled'], outputs=self.outputs['Unlabeled']) # DEFINE LOSS # generate affinity matrix W according to params if affinity == 'siamese': input_affinity = tf.concat( [siamese_net.outputs['A'], siamese_net.outputs['Labeled']], axis=0) x_affinity = siamese_net.predict(x_train, batch_sizes) elif affinity in ['knn', 'full']: input_affinity = tf.concat( [self.inputs['Unlabeled'], self.inputs['Labeled']], axis=0) x_affinity = x_train # calculate scale for affinity matrix scale = util.get_scale(x_affinity, self.batch_sizes['Unlabeled'], scale_nbr) # create affinity matrix if affinity == 'full': weight_mat = affinities.full_affinity(input_affinity, scale=scale) elif affinity in ['knn', 'siamese']: weight_mat = affinities.knn_affinity(input_affinity, n_nbrs, scale=scale, scale_nbr=scale_nbr) # define loss self.tau = tf.Variable(self.temperature, name='temperature') self.outputs['Unlabeled'] = util.gumbel_softmax( self.outputs['Unlabeled'], self.tau) num_nodes = self.batch_sizes['Unlabeled'] cluster_size = tf.reduce_sum(self.outputs['Unlabeled'], axis=0) ground_truth = [num_nodes / float(n_clusters)] * n_clusters bal = tf.losses.mean_squared_error(ground_truth, cluster_size) degree = tf.expand_dims(tf.reduce_sum(weight_mat, axis=1), 0) vol = tf.matmul(degree, self.outputs['Unlabeled'], name='vol') normalized_prob = tf.divide(self.outputs['Unlabeled'], vol[tf.newaxis, :], name='normalized_prob')[0] gain = tf.matmul(normalized_prob, tf.transpose(1 - self.outputs['Unlabeled']), name='res2') self.loss = tf.reduce_sum(gain * weight_mat) + bal_reg * bal # create the train step update self.learning_rate = tf.Variable(self.lr, name='cnc_learning_rate') self.train_step = tf.train.RMSPropOptimizer( learning_rate=self.learning_rate).minimize( self.loss, var_list=self.net.trainable_weights) # initialize cnc_net variables K.get_session().run(tf.global_variables_initializer()) K.get_session().run( tf.variables_initializer(self.net.trainable_weights)) if affinity == 'siamese': output_path = os.path.join(self.main_path, dset) load_model(siamese_net, output_path, '_siamese')
def __init__(self, model, model_name, preds, confidence, gt_coords): """Constructs a GuidedBackprop SaliencyMask.""" if GuidedBackprop.GuidedReluRegistered is False: @tf.RegisterGradient("GuidedRelu") def _GuidedReluGrad(op, grad): gate_g = tf.cast(grad > 0, "float32") gate_y = tf.cast(op.outputs[0] > 0, "float32") return gate_y * gate_g * grad GuidedBackprop.GuidedReluRegistered = True """ Create a dummy session to set the learning phase to 0 (test mode in keras) without inteferring with the session in the original keras model. This is a workaround for the problem that tf.gradients returns error with keras models that contains Dropout or BatchNormalization. Basic Idea: save keras model => create new keras model with learning phase set to 0 => save the tensorflow graph => create new tensorflow graph with ReLU replaced by GuiededReLU. """ # Set to test phase K.set_learning_phase(0) # Load training model if 'train' in model_name: print('Loading model ...') model = load_model('./tmp/gb_keras_train.h5') session = K.get_session() tf.compat.v1.train.export_meta_graph() saver = tf.compat.v1.train.Saver() saver.save(session, './tmp/guided_backprop_ckpt') self.guided_graph = tf.Graph() with self.guided_graph.as_default(): self.guided_sess = tf.Session(graph=self.guided_graph) with self.guided_graph.gradient_override_map( {'LeakyRelu': 'GuidedRelu'}): # replace LeakyRelu with GuidedRelu saver = tf.compat.v1.train.import_meta_graph( './tmp/guided_backprop_ckpt.meta') saver.restore(self.guided_sess, './tmp/guided_backprop_ckpt') output_list = [] if 'train' in model_name: batch_idx = 0 # which image in the batch (assume batch size =1) anchor_box_idx = 2 # [20,20] prob_obj_idx = 4 # index for probability of a detection grid_hs, grid_ws = grid_coords(gt_coords) gt_grids = list(zip(grid_hs, grid_ws)) train_output = self.guided_graph.get_tensor_by_name( model.output.name) # 64,64,3,6 for grid in gt_grids: h = grid[0] w = grid[1] out_tensor = self.guided_graph.get_tensor_by_name( model.output.name)[batch_idx, h, w, anchor_box_idx, prob_obj_idx] output_list.append(out_tensor) elif 'infer' in model_name: preds = preds.tolist() for idx, p in enumerate(preds): p = list(p) if p[5] > confidence: out_tensor = self.guided_graph.get_tensor_by_name( model.output.name)[0, idx, 5] output_list.append(out_tensor) self.imported_y = output_list self.imported_x = self.guided_graph.get_tensor_by_name( model.input.name) self.guided_grads_node = tf.gradients( self.imported_y, self.imported_x ) # calculate gradient of class score with respect to input
import tensorflow.compat.v1 as tf import numpy as np import tensorflow.compat.v1.keras as keras import tensorflow.compat.v1.keras.backend as K tf.disable_v2_behavior() #code have been written for TF1 def custom_softmax(x): m = tf.reduce_max(x, 1) x = x - m e = tf.exp(x) return e / tf.reduce_sum(e, -1) a = np.random.randn(1, 1000) tfy = tf.nn.softmax(a) ky = keras.activations.softmax(K.variable(a)) tfc = custom_softmax(a) session = K.get_session() tfy_ = session.run(tfy) ky_ = session.run(ky) tfc_ = session.run(tfc) print("tf vs k", np.abs(tfy_ - ky_).sum()) print("tf vs custom", np.abs(tfy_ - tfc_).sum()) print("custom vs k", np.abs(tfc_ - ky_).sum())
def train(self, dataset): # Transform data into format to be fed into model # Below code is more suitable for run mode than train mode ''' (X, Y, X_valid, Y_valid) = dataset.load_as_list() X = self.trainable_model.encode_input(X) Y = self.trainable_model.encode_output(Y) X_valid = self.trainable_model.encode_input(X_valid) Y_valid = self.trainable_model.encode_output(Y_valid) ''' # If using multi-gpu, then we save model/log files in other directory than normal one dir_suffix = '' gpu_count = len(self.get_available_gpus()) if self.multi_gpu: gpu_count = len(self.get_available_gpus()) # Changed to save multi-gpu model at the same path as single gpu model #if gpu_count > 1: # dir_suffix = '_' + str(gpu_count) + 'gpus' print('Training on ' + str(gpu_count) + ' GPU(s)') # In case of train mode, we can load data in the wqay that we can utilize caching feature. # We separate call between input and output because they are use different transformation approach. (X, Y, X_valid, Y_valid) = self.trainable_model.load_encoded_data(dataset) print(len(X[0])) print(len(Y)) print(len(X_valid[0])) print(len(Y_valid)) ''' xx = X[0:5] yy = Y[0:5] print('xx') print(xx) print('yy') print(yy) ''' training_data_count = 0 if self.input_transform.get_data_dimension() > 1: training_data_count = X[0].shape[0] else: training_data_count = X.shape[0] print('Training data count = ' + str(training_data_count)) batch_count = int(training_data_count / self.training_config['batch_size']) print('Batch count = ' + str(batch_count)) training_data_count = int(batch_count * self.training_config['batch_size']) print('Training data used = ' + str(training_data_count)) epochs_count = int(self.training_config['epochs']) if 'final_epochs' in self.training_config: # Federated learning will have this vale overidden epochs_count = int(self.training_config['final_epochs']) training_steps = int(batch_count) * epochs_count training_batch_count = batch_count validation_data_count = 0 if self.input_transform.get_data_dimension() > 1: validation_data_count = X_valid[0].shape[0] else: validation_data_count = X_valid.shape[0] print('Validation data count = ' + str(validation_data_count)) batch_count = int(validation_data_count / self.training_config['batch_size']) print('Batch count = ' + str(batch_count)) validation_data_count = int(batch_count * self.training_config['batch_size']) print('Validation data used = ' + str(validation_data_count)) if self.input_transform.get_data_dimension() > 1: X = [a[0:training_data_count] for a in X] X_valid = [a[0:validation_data_count] for a in X_valid] print('>>> X len = ' + str(len(X[0]))) print('>>> X_valid len = ' + str(len(X_valid[0]))) else: X = X[0:training_data_count] X_valid = X_valid[0:validation_data_count] print('>>>> X len = ' + str(X.shape[0])) print('>>>> X_valid len = ' + str(X_valid.shape[0])) if self.output_transform.get_data_dimension() > 1: Y = [a[0:training_data_count] for a in Y] Y_valid = [a[0:validation_data_count] for a in Y_valid] print('>>> Y len = ' + str(len(X[0]))) print('>>> Y_valid len = ' + str(len(X_valid[0]))) else: Y = Y[0:training_data_count] Y_valid = Y_valid[0:validation_data_count] print('>>>> Y len = ' + str(Y.shape[0])) print('>>>> Y_valid len = ' + str(Y_valid.shape[0])) # If multi-model, wrap it as Data Parallel trainable model if gpu_count > 1: with tf.device('/cpu'): [input_tensors, output_tensors] = self.trainable_model.get_forward_tensors() print("=== INPUT_TENSOR ===") print(input_tensors) print("=== OUTPUT_TENSOR ===") print(output_tensors) model = Model(input_tensors, output_tensors) print("=== CPU TEMPLATE MODEL ===") model.summary() single_gpu_model = model # For saving weight model = multi_gpu_model(model, gpus=gpu_count) print("=== MULTI-GPU MODEL ===") model.summary() elif gpu_count == 1: with tf.device('/gpu'): [input_tensors, output_tensors] = self.trainable_model.get_forward_tensors() model = Model(input_tensors, output_tensors) single_gpu_model = model elif gpu_count == 0: with tf.device('/cpu'): [input_tensors, output_tensors] = self.trainable_model.get_forward_tensors() model = Model(input_tensors, output_tensors) single_gpu_model = model current_epoch_wrapper = LogCurrentEpochWrapper(self.training_config, dir_suffix) initial_epoch = 0 if 'resume_if_possible' in self.training_config and self.training_config[ 'resume_if_possible'] == True: initial_epoch = current_epoch_wrapper.get_current_epoch() # Home of output directory (support multi-OS) output_dir = os.path.join( *re.split('/|\\\\', self.training_config['output_dir'])) if not os.path.exists(output_dir): os.makedirs(output_dir) optimizer = self.training_config['optimizer'] if optimizer == 'adam': optimizer_params = self.training_config['optimizer_params'] optimizer = Adam(optimizer_params[0], optimizer_params[1], optimizer_params[2], epsilon=optimizer_params[3]) elif optimizer == 'bert_adam': optimizer_params = self.training_config['optimizer_params'] # Calculate total step and set it to decay_steps (learning rate reachs 0 in the every end) total_steps = batch_count * self.training_config['epochs'] print('[INFO] Training with BERT Optimizer with decay_steps = ' + str(total_steps)) from NLP_LIB.optimizer.bert_optimizer import BERTOptimizer optimizer = BERTOptimizer( decay_steps=total_steps, # 100000, warmup_steps=optimizer_params[2], # 10000, learning_rate=optimizer_params[0], # 1e-4, weight_decay=optimizer_params[1], # 0.01, weight_decay_pattern=[ 'embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo' ], ) elif optimizer == 'bert': optimizer_params = self.training_config['optimizer_params'] from NLP_LIB.ext.bert.optimization import AdamWeightDecayOptimizer print('initial_epoch = ' + str(initial_epoch)) print('training_batch_count = ' + str(training_batch_count)) initial_step = initial_epoch * training_batch_count print('initial_step = ' + str(initial_step)) optimizer = AdamWeightDecayOptimizer( initial_step= initial_step, # Start from current epoch to keep model running with correct LR learning_rate=optimizer_params[0], # 0.0001, num_train_steps=training_steps, # 100, warmup_steps=optimizer_params[4], # 10, lr_decay_power=optimizer_params[5], weight_decay_rate=optimizer_params[6], beta_1=optimizer_params[1], # 0.9, beta_2=optimizer_params[2], # 0.999, epsilon=optimizer_params[3], # 1e-6, exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) # Add model metric names and tensors to tracking list metric_names = self.trainable_model.get_metric_names() metric_funcs = self.trainable_model.get_metric_functions() ''' metric_names = self.trainable_model.get_metric_names() metric_tensors = self.trainable_model.get_metric_tensors() for metric_name, metric_tensor in zip(metric_names, metric_tensors): print('Add Metric: ' + metric_name) model.metrics_names.append(metric_name) model.metrics_tensors.append(metric_tensor) ''' model.compile(optimizer=optimizer, loss=self.trainable_model.get_loss_function(), metrics=metric_funcs) model.summary() if self.input_transform.get_data_dimension() > 1: x_feed = X x_valid_feed = X_valid else: x_feed = [X] x_valid_feed = [X_valid] #exit(0) if self.output_transform.get_data_dimension() > 1: y_feed = Y y_valid_feed = Y_valid else: y_feed = [Y] y_valid_feed = [Y_valid] # If model is sequence model, we have to feed prev_output too. # TODO: Can we embed the flow to generate input list into the data transformation class? if isinstance(self.trainable_model, SequenceModelWrapper): print('OH NOOO!!!') #exit(0) x_feed.append(Y) x_valid_feed.append(Y_valid) # Also, if we are running Sequence Model, output will be logits but label will be sparse value. # Keras loss function need label and output to be in same dimension, thus we need to convert label to dense value too. # The converson to Dense is done in custom loss funciton in the model, but be need to "prepare" addition dimension to sparse label. y_feed = [np.expand_dims(Y, axis=2)] y_valid_feed = [np.expand_dims(Y_valid, axis=2)] class CustomTensorBoard(TensorBoard): def __init__( self, log_dir, **kwargs): # add other arguments to __init__ if you need super().__init__(log_dir=log_dir, **kwargs) def on_epoch_end(self, epoch, logs=None): logs = logs or {} # If there is learning_rate_tensor in the optimizer, we want to log it too. if hasattr(optimizer, 'learning_rate_tensor'): logs.update({ 'learning_rate': K.eval(optimizer.learning_rate_tensor) }) ''' # Also add gradient norm as a default metric # Get a "l2 norm of gradients" tensor def get_gradient_norm(model): with K.name_scope('gradient_norm'): grads = K.gradients(model.total_loss, model.trainable_weights) norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) return norm logs.update({'gradient_norm': K.eval(get_gradient_norm(model))}) ''' super().on_epoch_end(epoch, logs) # Tensorboard log directory tboard_log_dir = os.path.join(output_dir, 'tboard_log' + dir_suffix) if not os.path.exists(tboard_log_dir): os.makedirs(tboard_log_dir) tboard_log_saver = CustomTensorBoard(tboard_log_dir, write_graph=False, write_images=False) # For saving weight history along with accuracy in each epoch (May use a lot of disk) verbose_model_saver = None if self.training_config['save_weight_history']: verbose_log_dir = os.path.join(output_dir, 'weight_history' + dir_suffix) if not os.path.exists(verbose_log_dir): os.makedirs(verbose_log_dir) verbose_weight_history_filepath = os.path.join( verbose_log_dir, 'weights.{epoch:02d}-{' + self.training_config['watch_metric'] + ':.4f}.h5') # If there is option to specified number of eopch to be saved if 'save_weight_every' in self.training_config: save_weight_every = self.training_config['save_weight_every'] print('[INFO] Save weight every = ' + str(save_weight_every)) verbose_model_saver = RefModelCheckpoint( verbose_weight_history_filepath, single_gpu_model, save_best_only=False, save_weights_only=True, period=save_weight_every) else: verbose_model_saver = RefModelCheckpoint( verbose_weight_history_filepath, single_gpu_model, save_best_only=False, save_weights_only=True) model.summary() # Initialize all variables, including local variables created by metrics calculations and optimizers. sess = K.get_session() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init) ##### ## DEBUG Print some training variable before loading checkpoint #global_vars = tf.global_variables() #print('[DEBUG]: First Weight Name = ' + str(global_vars[0].name)) #print('[DEBUG]: First Weight = ' + str(sess.run(global_vars[0]))) # Callback to model after finish variable initialization, init_from_checkpoint is loaded here. self.trainable_model.on_after_init(single_gpu_model) # If resume training, load latest checkpoint # Checkpoint saving directory checkpoint_dir = os.path.join(output_dir, 'checkpoint') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) last_checkpoint_filepath = os.path.join( checkpoint_dir, 'last_weight' + dir_suffix + '.h5') if 'resume_if_possible' in self.training_config and self.training_config[ 'resume_if_possible'] == True: print('Init model ' + str(self) + ' from epoch: ' + str(initial_epoch)) if os.path.exists(last_checkpoint_filepath): print('Init model ' + str(self) + ' from checkpoint: ' + last_checkpoint_filepath) single_gpu_model.load_weights(last_checkpoint_filepath) self.training_config['initial_epoch'] = initial_epoch checkpoint_filepath = os.path.join(checkpoint_dir, 'best_weight' + dir_suffix + '.h5') model_saver = RefModelCheckpoint(checkpoint_filepath, single_gpu_model, save_best_only=True, save_weights_only=True) # Also always save lastest model for continue training last_model_saver = RefModelCheckpoint(last_checkpoint_filepath, single_gpu_model, save_best_only=False, save_weights_only=True) # Construct all training callbacks training_callbacks = [model_saver, last_model_saver, tboard_log_saver] if verbose_model_saver is not None: training_callbacks.append(verbose_model_saver) if self.callback_list is not None: for callback in self.callback_list: training_callbacks.append(callback.get_keras_callback()) # Save current epoch training_callbacks.append(current_epoch_wrapper.get_keras_callback()) ##### ## DEBUG Print some training variable before after checkpoint #global_vars = tf.global_variables() #print('[DEBUG]: First Weight Name = ' + str(global_vars[0].name)) #print('[DEBUG]: First Weight = ' + str(sess.run(global_vars[0]))) print('Start training.') ''' with tf.Session(config = tf.ConfigProto(log_device_placement = False, allow_soft_placement=False)) as sess: init = tf.global_variables_initializer() sess.run(init) model.fit(x=x_feed, y=y_feed, batch_size=self.training_config['batch_size'], epochs=self.training_config['epochs'], validation_data=(x_valid_feed, y_valid_feed), callbacks=training_callbacks, initial_epoch=initial_epoch ) ''' # print(model.trainable_weights) model.fit(x=x_feed, y=y_feed, batch_size=self.training_config['batch_size'], epochs=self.training_config['epochs'], validation_data=(x_valid_feed, y_valid_feed), callbacks=training_callbacks, initial_epoch=initial_epoch) print('Finished training.') # Return trained model (single_gpu_model) and validation set as output. # They are used for further benchmarking like in federated training. return (single_gpu_model, x_valid_feed, y_valid_feed)