def load_model(): # setting gpus self.cpu_devices = get_cpu_devices() self.gpu_devices = get_gpu_devices() if (len(self.gpu_devices) > 0): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(self.gpu_to_use) self.devices, _ = (self.gpu_devices, DeviceCategory.GPU) if len( self.gpu_devices) > 0 else (self.cpu_devices, DeviceCategory.CPU) # initialization self.sess_transfer_learner, self.end_points, self.logits, self.input_tensor = InitializeTransferLearner( self.model_dir, self.pretrained_model, self.classes) init_model = True # logging checkPathExists([self.infer_dir, self.log_dir]) self.log_filename = self.log_dir + '/' + datetime.now().strftime( 'prediction_logs_%H_%M_%d_%m_%Y.log') with open(self.log_filename, 'w') as prediction_log: prediction_log.write('Filename' + '\t' + 'Actual' + '\t' + 'Predicted' + '\t' + 'Score' + '\t' + 'Noise_Suppression' + '\t' + 'Language_Model' + '\n') logging( 'Inference engine has been initiated on port: ' + str(self.port), self.logger, 'info') self.initization = True
def train_model(model_name='densenet121', opt='Adagrad', dataset='iris', writer=None): train_loader, val_loader, test_loader = load_data(dataset) # Model selection model = load_model(model_name) # Optimizer if model_name == "ownnet": optimizer = opt_selection(model[0], opt) else: optimizer = opt_selection(model, opt) # Loss Criterion if dataset == 'mltoy' or dataset == "yeast14c" or dataset == "yeast14c_m": # criterion = nn.MultiLabelSoftMarginLoss() criterion = nn.BCEWithLogitsLoss() else: criterion = nn.CrossEntropyLoss() if model_name == "ownnet": criterion = torch.nn.MSELoss() best_train, best_val = 0.0, 0.0 g = 0 for epoch in range(1, args.epochs + 1): # Train and Validate train_stats = train_step(model, criterion, optimizer, train_loader, g, epoch) valid_stats = valid_step(model, criterion, val_loader) g += 1 # Logging logging(epoch, train_stats, valid_stats, writer) # Keep best model # print(train_stats['accuracy'], valid_stats['accuracy'], best_train, best_val) if valid_stats['accuracy'] > best_val or ( valid_stats['accuracy'] == best_val and train_stats['accuracy'] >= best_train): best_train = train_stats['accuracy'] best_val = valid_stats['accuracy'] if model_name == "ownnet": best_model_weights = copy.deepcopy(model[0].state_dict()) else: best_model_weights = copy.deepcopy(model.state_dict()) # Load best model and evaluate on test set model.load_state_dict(best_model_weights) test_stats = valid_step(model, criterion, test_loader) # print(train_stats['accuracy'], valid_stats['accuracy'], best_train, best_val) print( '\nBests Model Accuracies: Train: {:4.2f} | Val: {:4.2f} | Test: {:4.2f}' .format(best_train, best_val, test_stats['accuracy'])) return model
def get_action(self, state): try: return self.sess.run(self.predicted_action, {self.states: state}) # if random.random() < self.exploration: # return np.array([[random.sample(range(1, 35), 4*self.max_layers)]]) # else: # return self.sess.run(self.predicted_action, {self.states: state}) except Exception as e: logging("Get action failed - " + str(e), self.logger, 'error')
def get_action(self, state, init=False): try: if random.random() < self.exploration or init: return np.array([[ random.sample(range(1, self.num_hidden), self.search_space_size) ]], dtype=np.int32) else: return self.policy_session.run(self.predicted_action, {self.states: state}) except Exception as e: logging("Get action failed - " + str(e), self.logger, 'error')
def discount_reward_computation(self): try: rewards = np.asarray(self.reward_buffer) discounted_rewards = np.zeros_like(rewards) running_add = 0 for t in reversed(range(0, rewards.size)): if rewards[t] != 0.0: running_add = 0 running_add = running_add * self.discount_factor + rewards[t] discounted_rewards[t] = running_add return discounted_rewards[-1] except Exception as e: logging("Discount rewards failed - " + str(e), self.logger, 'error')
def policy_network(self, state, max_layers, logger=None): try: with tf.name_scope("policy_network"): nas_cell = tf.contrib.rnn.NASCell(4 * max_layers) outputs, state = tf.nn.dynamic_rnn(nas_cell, tf.expand_dims(state, -1), dtype=tf.float32) bias = tf.Variable([0.05] * 4 * max_layers) outputs = tf.nn.bias_add(outputs, bias) print("outputs: ", outputs, outputs[:, -1:, :], tf.slice(outputs, [0, 4 * max_layers - 1, 0], [1, 1, 4 * max_layers])) # return tf.slice(outputs, [0, 4*max_layers-1, 0], [1, 1, 4*max_layers]) # Returned last output of rnn return outputs[:, -1:, :] except Exception as e: logging("Policy network failed - " + str(e), logger, 'error')
def train_model(model_name='densenet121', opt='Adagrad', dataset='iris', writer=None, label_col_name=''): # train_loader, val_loader, test_loader = load_data(dataset, label_col_name=label_col_name) train_loader, test_loader, nb_classes = load_data( dataset, label_col_name=label_col_name) # Model selection model = load_model(model_name, nb_classes=nb_classes) # Optimizer optimizer = opt_selection(model, opt) # Loss Criterion criterion = nn.CrossEntropyLoss() best_train, best_val = 0.0, 0.0 for epoch in range(1, args.epochs + 1): # Train and Validate train_stats = train_step(model, criterion, optimizer, train_loader) # valid_stats = valid_step(model, criterion, val_loader) # Logging # logging(epoch, train_stats, valid_stats, writer) logging(epoch, train_stats, writer) # Keep best model if train_stats['accuracy'] >= best_train: best_train = train_stats['accuracy'] # best_val = valid_stats['accuracy'] best_model_weights = copy.deepcopy(model.state_dict()) # Load best model and evaluate on test set model.load_state_dict(best_model_weights) test_stats = valid_step(model, criterion, test_loader) # print('\nBests Model Accuracies: Train: {:4.2f} | Val: {:4.2f} | Test: {:4.2f}'.format(best_train, best_val, test_stats['accuracy'])) print('\nBests Model Accuracies: Train: {:4.2f} | Test: {:4.2f}'.format( best_train, test_stats['accuracy'])) return model
def train_step(self, steps_count): try: for i, (grad, var) in enumerate(self.gradients): if grad is not None: print(self.gradients[i]) # print('prev_reward: ' + str(self.reward_buffer[-steps_count:])) states = np.array( self.state_buffer[-steps_count:]) / self.division_rate # reward = self.reward_buffer[-steps_count:] reward = np.asarray([self.discount_reward_computation() ]).astype('float32') # print('states: ' + str(states[0])) # print('rewards: ' + str(reward)) _, loss, summary, log_probs, global_step = self.policy_session.run( [ self.train_op, self.loss, self.summaries_op, self.policy_outputs, self.global_step ], { self.states: states, self.discounted_rewards: reward }) log_probs = ['%.3f' % elem for elem in log_probs[0][0]] # print('' + str(log_probs)) self.summary_writer.add_summary(summary, global_step) self.summary_writer.flush() self.saver.save(self.policy_session, save_path=self.model_dir + 'controller/model.chkt', global_step=self.global_step) # reduce exploration after many train steps if global_step != 0 and global_step % 20 == 0 and self.exploration > 0.5: self.exploration *= 0.99 return loss, log_probs except Exception as e: logging("Train step failed - " + str(e), self.logger, 'error')
def main(args): torch.manual_seed(422) log = logging(args.save_folder) train_set = DataLoader( dataset=p2Dataset(root_dir='hw2_data/p2_data/train'), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_set = DataLoader( dataset=p2Dataset(root_dir='hw2_data/p2_data/validation'), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) device = args.device model = VGGFCN8s(pretrained=True, n_classes=7) # if torch.cuda.device_count() > 1: # model = torch.nn.DataParallel(model) model = model.to(device) # model.load_state_dict(torch.load( # 'weights/q2/fcn8s/epoch_149-0.660.pth', map_location=args.device)) optimzer = optim.Adam(model.parameters(), lr=7e-5) criterion = CrossEntropyLoss(ignore_index=6) min_loss = 2 for epoch in range(50): log.write('\nepoch: {}\n'.format(epoch)) loss = train(train_set, model, optimzer, criterion, device) log.write('train: {}\n'.format(loss)) loss, iou = validation(valid_set, model, criterion, device) log.write('validation: {}, mIoU:{:.3f}\n'.format(loss, iou)) if iou > 0.6 and loss < min_loss: torch.save( model.state_dict(), '{}/epoch_{:02d}-{:.3f}.pth'.format(args.save_folder, epoch, iou)) min_loss = loss log.write('Best epoch: {}\n'.format(epoch))
def dataPreprocessing(mode, data_dir, features_dir, batch_size, pretrained_model, logger=None): try: labels_dict = {} # label dictionary if (not os.path.isfile(features_dir + "/" + mode + ".tfrecord") ): # check whether feature file already exist logging("Preparing " + mode + "ing data...", logger, 'info') record_writer = tf.python_io.TFRecordWriter( path=features_dir + "/" + mode + ".tfrecord") labels = os.listdir( data_dir + "/" + mode) # get all the images and labels in directory labels.sort( ) # sort the labels so that training and validation get them in the same order if (not os.path.isfile(features_dir + '/labels.txt') ): # check whether feature file already exist for i, label in enumerate(list( set(labels))): # preparing labels labels_dict[label] = i if (not os.path.isfile(features_dir + '/labels.txt') ): # check whether labels file already exist with open(features_dir + '/labels.txt', 'w') as writelabelDict: # write labels file for k in sorted(labels_dict, key=labels_dict.get): writelabelDict.write( str(labels_dict[k]) + ':' + k + '\n') else: with open(features_dir + '/labels.txt', 'r') as label_file: # load labels for line in re.split('\r?\n', label_file.read()): line = line.split(':') if len(line[0]) and len(line[1].strip( )) and not line[1].strip() in labels_dict: labels_dict[line[1].strip().lower()] = int(line[0]) image_reader = ImageReader() with tf.Session('') as sess: for label in labels: for filename in os.listdir( os.path.join(data_dir, mode, label)): if (filename not in '.DS_Store'): image_data = tf.gfile.FastGFile( os.path.join(data_dir, mode, label, filename), 'rb').read() # extract image features height, width = image_reader.read_image_dims( sess, image_data) example = tf.train.Example(features=tf.train.Features(feature={ # tensorflow example 'image/encoded': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_data])), 'image/format': tf.train.Feature(bytes_list=tf.train.BytesList(value=[b'jpg'])), 'image/class/label': tf.train.Feature(int64_list=tf.train.Int64List(value=[labels_dict[label]])), 'image/height': tf.train.Feature(int64_list=tf.train.Int64List(value=[height])), 'image/width': tf.train.Feature(int64_list=tf.train.Int64List(value=[width])), })) record_writer.write(example.SerializeToString()) record_writer.flush() record_writer.close() # load dataset with open(features_dir + '/labels.txt', 'r') as label_file: # load labels for line in re.split('\r?\n', label_file.read()): line = line.split(':') if len(line[0]) and len(line[1].strip( )) and not line[1].strip() in labels_dict: labels_dict[int(line[0])] = line[1].strip().lower() num_samples = 0 # Count the total number of examples in all of these shard for _ in tf.python_io.tf_record_iterator(features_dir + "/" + mode + ".tfrecord"): num_samples += 1 reader = tf.TFRecordReader # create a reader, which must be a TFRecord reader in this case # create the keys_to_features dictionary for the decoder keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), } # create the items_to_handlers dictionary for the decoder. items_to_handlers = { 'image': slim.tfexample_decoder.Image(), 'label': slim.tfexample_decoder.Tensor('image/class/label'), } # start to create the decoder decoder = slim.tfexample_decoder.TFExampleDecoder( keys_to_features, items_to_handlers) # create the dataset dataset = slim.dataset.Dataset(data_sources=features_dir + "/" + mode + ".tfrecord", decoder=decoder, reader=reader, num_readers=4, num_samples=num_samples, num_classes=len(labels_dict), labels_to_name=labels_dict, items_to_descriptions={}) # create the data_provider object data_provider = slim.dataset_data_provider.DatasetDataProvider( dataset, common_queue_capacity=24 + 3 * batch_size, common_queue_min=24) # obtain the raw image using the get method raw_image, label = data_provider.get(['image', 'label']) # perform the correct preprocessing for this image depending if it is training or evaluating if (pretrained_model == 'inceptionV3'): # inception v3 image_size = inception_v3.default_image_size image = inception_preprocessing.preprocess_image( raw_image, height=image_size, width=image_size, is_training=(mode == 'train')) elif (pretrained_model == 'inception_resnetV2'): # inception_resnet v2 image_size = inception_resnet_v2.default_image_size image = inception_preprocessing.preprocess_image( raw_image, height=image_size, width=image_size, is_training=(mode == 'train')) elif (pretrained_model == 'vgg_19'): # vgg 19 image_size = vgg_19.default_image_size image = vgg_preprocessing.preprocess_image( raw_image, output_height=image_size, output_width=image_size, is_training=(mode == 'train')) # as for the raw images reshape to batch it up # raw_image = tf.expand_dims(raw_image, 0) # raw_image = tf.image.resize_nearest_neighbor(raw_image, [image_size, image_size]) # raw_image = tf.squeeze(raw_image) # batch up the image by enqueing the tensors internally in a FIFO queue and dequeueing many elements with tf.train.batch. images, labels = tf.train.batch([image, label], batch_size=batch_size, num_threads=4, capacity=4 * batch_size, allow_smaller_final_batch=True) logging(mode + "ing data loaded successfully", logger, 'info') return dataset, images, labels except Exception as e: logging("Data preprocessing failed - " + str(e), logger, 'error')
def storeRollout(self, state, reward): try: self.reward_buffer.append(reward) self.state_buffer.append(state[0]) except Exception as e: logging("Store rollout failed - " + str(e), self.logger, 'error')
def create_variables(self): try: with tf.name_scope("model_inputs"): self.states = tf.placeholder(tf.float32, [None, self.search_space_size], name="states") with tf.name_scope("predict_actions"): # initialize policy network with tf.variable_scope("policy_network"): self.policy_outputs = self.policy_network( self.states, self.search_space_size) self.action_scores = tf.identity(self.policy_outputs, name="action_scores") self.predicted_action = tf.cast(tf.scalar_mul( self.division_rate, self.action_scores), tf.int32, name="predicted_action") # regularization loss policy_network_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy_network") # compute loss and gradients with tf.name_scope("compute_gradients"): self.discounted_rewards = tf.placeholder( tf.float32, (None, ), name="discounted_rewards" ) # gradients for selecting action from policy network with tf.variable_scope("policy_network", reuse=True): self.log_probs = self.policy_network( self.states, self.search_space_size) # compute policy loss and regularization loss self.cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits( logits=self.log_probs[:, -1, :], labels=self.states) self.pg_loss = tf.reduce_mean(self.cross_entropy_loss) self.reg_loss = tf.reduce_sum([ tf.reduce_sum(tf.square(x)) for x in policy_network_variables ]) # regularization self.loss = self.pg_loss + self.reg_param * self.reg_loss # compute gradients self.gradients = self.optimizer.compute_gradients(self.loss) # compute policy gradients for i, (grad, var) in enumerate(self.gradients): if grad is not None: self.gradients[i] = (grad * self.discounted_rewards, var) # training update with tf.name_scope("train_policy_network"): self.train_op = self.optimizer.apply_gradients( self.gradients, global_step=self.global_step ) # apply gradients to update policy network # vars = tf.trainable_variables() # print(vars) # tf.summary.scalar("controller_cross_entropy_loss", self.pg_loss) # tf.summary.scalar('controller_regularizer_loss', self.reg_loss) tf.summary.scalar('controller_discounted_reward', tf.reduce_sum(self.discounted_rewards)) tf.summary.scalar("controller_loss", self.loss) # tf.summary.scalar("learning_rate", self.learning_rate) self.summaries_op = tf.summary.merge_all() filename = self.log_dir + '/controller/tb_logs/' #%s' % time.strftime("%Y-%m-%d-%H-%M-%S") self.summary_writer = tf.summary.FileWriter( filename, graph=self.policy_session.graph) self.policy_session.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(max_to_keep=1) ckpt = tf.train.latest_checkpoint(self.model_dir + 'controller/model.chkt') if ckpt and tf.train.checkpoint_exists(self.model_dir): self.saver.restore(self.policy_session, ckpt) logging( self.model_dir + 'controller/model.chkt' + " model loaded successfully", self.logger, 'info') except Exception as e: logging("Create variables failed - " + str(e), self.logger, 'error')
def get_reward(self, action, prev_step, prev_accuracy): # prev_epoch try: # action = [action[0][0][x:x + search_space_size] for x in range(0, len(action[0][0]), search_space_size)] # drop_rate = [self.search_space['3'][c[3] % len(self.search_space['3'])] for c in action] # activation = self.network_structure[0][8] # drop_rate = [c[0] for c in action] depth = action[0] dropout = action[1] learning_rate = action[2] momentum = action[3] # nesterov = action[4] # depth = 101 # dropout = 0.2 # learning_rate = 0.001 # momentum = 0.95 summary, global_step = None, None # for epoch in range(prev_epoch, self.train_num_epochs + 1): iterations = 0 for step_ in range(prev_step, self.num_steps + 1): iterations += 1 curr_itr = prev_step + self.num_child_steps_per_cycle for step in range(prev_step, (curr_itr if curr_itr < self.num_steps else self.num_steps)): train_batch_xi = self.train_batch_x[step * self.train_batch_size: (step + 1) * self.train_batch_size] train_batch_yi = self.train_batch_y[step * self.train_batch_size: (step + 1) * self.train_batch_size] _, summary, global_step = self.sess.run( [self.train_op, self.summaries_op, self.global_step], feed_dict={ self.inputs: train_batch_xi, self.labels: train_batch_yi, self.depth: depth, self.dropout_rate: dropout, self.learning_rate: learning_rate, self.momentum: momentum }) if step % 10 == 0: # calculate batch loss and accuracy self.loss_value, accuracy = self.sess.run( [self.loss, self.accuracy], feed_dict={ self.inputs: train_batch_xi, self.labels: train_batch_yi, self.depth: depth, self.dropout_rate: dropout, self.learning_rate: learning_rate, self.momentum: momentum }) print("Training: | Step: " + str(step) + " | Training Loss: " + "{:.3f}".format(self.loss_value) + " | Training Accuracy: " + "{:.3f}".format(accuracy)) # Epoch: " + str(epoch + 1) + " # validation validation_accuracy, probs, trainable_variables = self.sess.run( [ self.validation_accuracy, self.probs, self.all_trainable_vars ], feed_dict={ self.inputs: self.test_batch_x, self.labels: self.test_batch_y, self.depth: depth, self.dropout_rate: dropout, self.learning_rate: learning_rate, self.momentum: momentum }) print("Validation | Step: " + str(step) + " | Validation Accuracy: " + "{:.3f}".format(validation_accuracy)) # print gradients # print(g) # for var, grad_value in zip(self.var_list, g): # grad, value = grad_value # print('', var.op.name, grad.squeeze(), sep='\n') # difference factor # different_factor = self.get_different_factor(accuracy * 100) # if accuracy * (1 + different_factor) <= self.best_accuracy: # self.best_accuracy = accuracy # step = prev_step # else: # if prev_accuracy * 1.2 <= accuracy: # if(prev_step == 0): # prev_epoch += 1 # else: # prev_epoch = epoch # if (accuracy + different_factor) <= self.best_accuracy: # compute the reward reward = validation_accuracy #- self.moving_accuracy) # if self.moving_accuracy == 0.0 or reward == 0.0: # reward = 0.01 if self.clip_rewards: reward = np.clip(reward, -0.05, 0.05) # update moving accuracy with bias correction for 1st update if self.beta > 0.0 and self.beta < 1.0: self.moving_accuracy = self.beta * self.moving_accuracy + ( 1 - self.beta) * validation_accuracy self.moving_accuracy = self.moving_accuracy / ( 1 - self.beta_bias) self.beta_bias = 0 # reward = np.clip(reward, -0.1, 0.1) if reward <= 0.0: reward = 0.01 print("Evaluation accuracy: " + str(validation_accuracy) + " | moving accuracy: " + str(round(self.moving_accuracy, 4)) + " | previous accuracy: " + str(prev_accuracy)) # if(self.moving_accuracy > validation_accuracy and (validation_accuracy - prev_accuracy) < 0.0): # if (validation_accuracy - prev_accuracy) < 0.0: #different_factor: self.summary_writer.add_summary(summary, global_step) self.summary_writer.flush() self.saver.save(self.sess, save_path=self.model_dir + 'network/model.chkt', global_step=tf.train.get_global_step()) return reward, validation_accuracy, self.loss_value, prev_step, step, probs, iterations, self.moving_accuracy, trainable_variables # else: # prev_accuracy = validation_accuracy # prev_step = step % (self.num_steps - 1) # else: # self.best_accuracy = accuracy # if accuracy - prev_accuracy <= 0.01: #and reward >= 0.0: # return accuracy, accuracy, loss, prev_step, step, probs, iterations # else: # return 0.01, accuracy, loss, prev_step, step, probs, iterations # prev_epoch # if (accuracy - prev_accuracy) <= different_factor: # if (accuracy - prev_accuracy) <= 0.01: # # # compute the reward # # reward = (accuracy - self.moving_accuracy) # # # # # if rewards are clipped, clip them in the range -0.05 to 0.05 # # # if self.clip_rewards: # # # reward = np.clip(reward, -0.05, 0.05) # # # # # update moving accuracy with bias correction for 1st update # # if self.beta > 0.0 and self.beta < 1.0: # # self.moving_accuracy = self.beta * self.moving_accuracy + (1 - self.beta) * accuracy # # self.moving_accuracy = self.moving_accuracy / (1 - self.beta_bias) # # self.beta_bias = 0 # # # # # reward = np.clip(reward, -0.1, 0.1) # # reward = accuracy # # return reward, accuracy, loss, epoch, prev_step, step # else: # return 0.01, accuracy, loss, epoch, prev_step, step except Exception as e: logging("Get reward failed - " + str(e), self.logger, 'error')
def train(mode, dataset, images, labels, batch_size, num_epochs, optimizer_fn, learning_rate, learning_rate_decay_factor, num_epochs_per_decay, dropout_keep_prob, pretrained_model, model_dir, pretrained_model_dir, layer_count, logger = None): try: # find the number steps to take before decaying the learning rate and batches per epoch num_batches_per_epoch = int(dataset.num_samples / batch_size) + 1 num_steps_per_epoch = num_batches_per_epoch # one step is one batch processed # decay_steps = int(num_epochs_per_decay * num_steps_per_epoch) # initializing the model if (pretrained_model == 'inceptionV3'): # inception V3 model_file = 'inception_v3.ckpt' architecture_layers = inceptionV3_layers with slim.arg_scope(inception_v3_arg_scope()): # create the model inference logits, end_points = inception_v3(images, num_classes=dataset.num_classes, dropout_keep_prob=dropout_keep_prob, is_training=(mode == 'train')) elif (pretrained_model == 'inception_resnetV2'): # inception_resnetV2 model_file = 'inception_resnet_v2_2016_08_30.ckpt' architecture_layers = inceptionResnetV2_layers with slim.arg_scope(inception_resnet_v2_arg_scope()): # create the model inference logits, end_points = inception_resnet_v2(images, num_classes=dataset.num_classes, dropout_keep_prob=dropout_keep_prob, is_training=(mode == 'train')) elif (pretrained_model == 'vgg_19'): # vgg 19 model_file = 'vgg_19.ckpt' architecture_layers = vgg_19_layers with slim.arg_scope(vgg_arg_scope()): # create the model inference logits, end_points = vgg_19(images, num_classes=dataset.num_classes, dropout_keep_prob=dropout_keep_prob, is_training=(mode == 'train')) if (pretrained_model == 'inceptionV3' or pretrained_model == 'inception_resnetV2'): # inceptionV3 or inception_resnetV2 logging("Transfer learning layers-" + str(layer_count) + ": " + str(architecture_layers[:(layer_count + 1)]), logger, 'info') # define the scopes that you want to exclude for restoration variables_to_restore = slim.get_variables_to_restore(exclude=architecture_layers[:(layer_count + 1)]) elif (pretrained_model == 'vgg_19'): # vgg 19 logging("Transfer learning layers-" + str(layer_count) + ": " + str(architecture_layers[:(layer_count)]), logger, 'info') # define the scopes that you want to exclude for restoration variables_to_restore = slim.get_variables_to_restore(exclude=architecture_layers[:(layer_count)]) # perform one-hot-encoding of the labels one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes) # performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks loss = tf.losses.softmax_cross_entropy(onehot_labels = one_hot_labels, logits = logits) total_loss = tf.losses.get_total_loss() # obtain the regularization losses as well # global_step = get_or_create_global_step() # create the global step for monitoring the learning_rate and training. # define your exponentially decaying learning rate # learning_rate = tf.train.exponential_decay( # learning_rate = initial_learning_rate, # global_step = global_step, # decay_steps = decay_steps, # decay_rate = learning_rate_decay_factor, # staircase = True) optimizer = optimizer_functions[optimizer_fn](learning_rate = learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) # define the optimizer that takes on the learning rate train_op = slim.learning.create_train_op(total_loss, optimizer) # create the train_op # the metrics that need to predict it isn't one_hot_encoded. if (pretrained_model == 'inceptionV3' or pretrained_model == 'inception_resnetV2'): # inceptionV3 or inception_resnetV2 predictions = tf.argmax(end_points['Predictions'], 1) # probabilities = end_points['Predictions'] elif (pretrained_model == 'vgg_19'): # vgg 19 predictions = tf.cast(tf.to_int64(tf.argmax(logits, 1)), tf.float32) accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels) metrics_op = tf.group(accuracy_update) #, probabilities) # all the summaries need to monitor and group them into one summary op. tf.summary.scalar('losses/Total_Loss', loss) tf.summary.scalar('train_accuracy', accuracy) # tf.summary.scalar('learning_rate', learning_rate) summary_op = tf.summary.merge_all() # create a saver function that actually restores the variables from a checkpoint file in a sess saver = tf.train.Saver(var_list=variables_to_restore, max_to_keep=1) def restore_fn(sess): return saver.restore(sess, pretrained_model_dir + model_file) # define supervisor for running a managed session. sv = tf.train.Supervisor(logdir=model_dir + str(layer_count) + "/", summary_op=None, init_fn=restore_fn) # run the managed session with sv.managed_session() as sess: # sess.run(init_from_final_layer) # initialize the last unfreez layer max_train_accuracy = 0.0 for step in range(num_steps_per_epoch * num_epochs): loss_value, _, _ = sess.run([train_op, sv.global_step, metrics_op]) if(step == 0): logging('Step: ' + str(int(step / num_batches_per_epoch + 1)) + '/' + str(num_epochs) + ' | learning rate: ' + str(learning_rate), logger, 'info') if step % num_steps_per_epoch == 0 and step != 0: logits_value, loss_value, accuracy_value, summary_values = sess.run([logits, total_loss, accuracy, summary_op]) logging('Step: ' + str(int(step / num_batches_per_epoch + 1)) + '/' + str(num_epochs) + ' | loss: ' + str(loss_value) + ' | accuracy: ' + str(accuracy_value), logger, 'info') # + ' | learning rate: ' + str(learning_rate_value) sv.summary_computed(sess, summary_values) # log the summaries if(accuracy_value > max_train_accuracy): max_train_accuracy = accuracy_value # log the final training loss and accuracy total_loss_value, total_accuracy_value = sess.run([total_loss, accuracy]) logging('Training Final loss: ' + str(total_loss_value) + ' | Training Final accuracy: ' + str(max_train_accuracy), logger, 'info') # once all the training has been done, save the log files and checkpoint model logging('Saving model of layers-' + str(layer_count), logger, 'info') sv.saver.save(sess, model_dir + str(layer_count) + '/', global_step=sv.global_step) if not str(layer_count) in train_accuracy: train_accuracy[str(layer_count)] = str(num_epochs) + '\t' + str(round(learning_rate, 8)) + '\t' + str(total_loss_value) + \ '\t' + str(total_accuracy_value * 100) + '\t' + str(max_train_accuracy * 100) logging("Transfer learning training completed successfully", logger, 'info') return train_accuracy except Exception as e: logging("Trasnfer learning training failed - " + str(e), logger, 'error')
def initialize_graph(self): # , action, step, pre_acc, search_space_size): try: # creating graph # self.graph = tf.Graph().as_default() tf.reset_default_graph() print('Building graph...') # max_depth = 18 # image_size = 228 # channels = 3 # batch_size = 32 # num_classes = 10 # learning_rate = 0.01 self.input_dimensions = self.input_dimensions.split('x') # if('mnist' in self.dataset_name): # self.inputs = tf.placeholder(tf.uint8, [None, int(self.input_dimensions[0]), int(self.input_dimensions[1]), int(self.input_dimensions[2])], name="inputs") # elif 'cifar' in self.dataset_name: if self.data_format == 'channels_last': self.inputs = tf.placeholder(tf.float32, shape=[ None, int(self.input_dimensions[0]), int(self.input_dimensions[1]), int(self.input_dimensions[2]) ], name="inputs") else: self.inputs = tf.placeholder(tf.float32, shape=[ None, int(self.input_dimensions[2]), int(self.input_dimensions[0]), int(self.input_dimensions[1]) ], name="inputs") self.labels = tf.placeholder(tf.int32, shape=[None, self.num_classes], name='label') self.depth = tf.placeholder(tf.int32, shape=[], name='depth') self.dropout_rate = tf.placeholder(tf.float32, shape=[], name="dropout") self.learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate') self.momentum = tf.placeholder(tf.float32, shape=[], name='momentum') # self.nesterov = tf.placeholder(tf.bool, shape=(), name='nesterov') # labels_onehot = tf.one_hot(self.labels, 10) # child network self.model = ChildNetwork(self.inputs, self.depth, self.dropout_rate, self.num_classes, max_depth=self.max_depth, initial_filters=self.initial_filters, data_format=self.data_format) logits, self.probs = self.model.stochastic_depth_conv2d( mode='train') self.loss, self.accuracy = self.model.classification_loss( logits=logits, label=self.labels) _, self.validation_accuracy = self.model.classification_loss( logits=logits, label=self.labels) # print([t.name for op in self.graph.get_operations() for t in op.values()]) # print([t for op in self.graph.get_operations() for t in op.values()]) self.global_step = tf.Variable(0, trainable=False) self.optimizer = self.activation_fn['4']( learning_rate=self.learning_rate, momentum=self.momentum, use_nesterov=True) self.var_list = tf.trainable_variables() self.train_op = self.optimizer.minimize( self.loss, global_step=self.global_step) # self.all_trainable_vars = [np.product(list(map(int, v.shape))) * v.dtype.size for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)] self.all_trainable_vars = tf.reduce_sum( [tf.reduce_prod(v.shape) for v in tf.trainable_variables()]) # np.sum([np.product([xi.value for xi in x.get_shape()]) for x in tf.all_variables()]) # if ('mnist' in self.dataset_name): # self.train_batch_x, self.train_batch_y, self.test_batch_x, self.test_batch_y = self.dataset[0], self.dataset[1], self.dataset[2], self.dataset[3] # elif ('cifar' in self.dataset_name): self.train_batch_x, self.train_batch_y, self.test_batch_x, self.test_batch_y = self.dataset self.num_steps = int( math.ceil(len(self.train_batch_x) / self.train_batch_size)) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) tf.summary.scalar("network_trainable_variables", self.all_trainable_vars) tf.summary.scalar("network_depth", self.depth) tf.summary.scalar("network_loss", self.loss) tf.summary.scalar('network_training_accuracy', self.accuracy) tf.summary.scalar('network_validation_accuracy', self.validation_accuracy) self.summaries_op = tf.summary.merge_all() filename = self.log_dir + '/network/tb_logs/' self.summary_writer = tf.summary.FileWriter(filename, graph=self.sess.graph) # vars = tf.trainable_variables() # print(vars) # some infos about variables... # vars_vals = self.sess.run(vars) # for var, val in zip(vars, vars_vals): # print("var: {}, value: {}".format(var.name, val)) self.saver = tf.train.Saver(max_to_keep=1) ckpt = tf.train.latest_checkpoint(self.model_dir + 'network/model.chkt') if ckpt and tf.train.checkpoint_exists(self.model_dir): self.saver.restore(self.sess, ckpt) logging( self.model_dir + 'network/model.chkt' + " model loaded successfully", self.logger, 'info') except Exception as e: logging("Get reward failed - " + str(e), self.logger, 'error')
def main(argv): try: if (len(argv) > 1): if (len(argv) > 1 or argv[1:][0] == '-h'): try: opts, args = getopt.getopt(argv[1:], "ho:m:", ["operation=", "model="]) for opt, arg in opts: opt = opt.lower() arg = arg.lower() if opt == '-h': print( 'voicenet.py -o <train|test|infer|analysis|serve> -m <inceptionV3,inception_resnetV2,vgg_19>' ) return elif opt in ("-o", "--operation"): mode = arg elif opt in ("-m", "--model"): pretrained_models = arg except getopt.GetoptError: print( 'voicenet.py -o <train|test|infer|analysis|serve> -m <inceptionV3,inception_resnetV2,vgg_19>' ) # -o <data_prep|train_test|freeze_model|infer|serve|regress_infer|analysis>') return if pretrained_models in 'inceptionV3' or pretrained_models in 'inception': pretrained_models = 'inceptionV3' elif pretrained_models in 'inception_resnetV2' or pretrained_models in 'resnet': pretrained_models = 'inception_resnetV2' elif pretrained_models in 'vgg_19' or pretrained_models in 'vgg': pretrained_models = 'vgg_19' else: mode = '' pretrained_models = '' # if len(argv): # gConfig = getConfig(main_dir + 'config/' + getConfig(argv[1]).lower() + '.ini') # get configuration # else: gConfig = getConfig('config/metavision.ini') # get configuration site = gConfig['site'] if (not len(mode)): mode = gConfig['mode'] if (not len(pretrained_models)): pretrained_models = gConfig['pretrained_model_dir'] datasets = gConfig['datasets'] data_dirs = gConfig['data_dir'] infer_dir = gConfig['infer_dir'] + "/" + datasets + "/" train_num_epochs = gConfig['train_num_epochs'] test_num_epochs = gConfig['test_num_epochs'] layer_start = gConfig['layer_start'] infer_layer = gConfig['infer_layer'] learning_rate = gConfig['learning_rate'] learning_rate_decay_factor = gConfig['learning_rate_decay_factor'] num_epochs_per_decay = gConfig['num_epochs_per_decay'] train_batch_size = gConfig['train_batch_size'] test_batch_size = gConfig['test_batch_size'] optimizer = gConfig['optimizer'] dropout_keep_prob = gConfig['dropout_keep_prob'] extract_features_only = gConfig['extract_features_only'] log_dir = gConfig['log_dir'] port = gConfig['port'] gpu_to_use = gConfig['gpu_to_use'] certificate = gConfig['certificate'] resource_dir = gConfig['resources'] # init_inception = False # init_inception_resnet = False # init_vgg = False # # logits = None # create logger _log.basicConfig(filename=log_dir + "/" + "log.txt", level=_log.DEBUG, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') logger = _log.getLogger("VoiceNet") logger.setLevel(_log.DEBUG) console = _log.StreamHandler() console.setLevel(_log.DEBUG) formatter = _log.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) # create formatter console.setFormatter(formatter) logger.addHandler(console) if ('train' in mode or 'test' in mode): # specify GPU numbers to use get gpu and cpu devices cpu_devices = get_cpu_devices() gpu_devices = get_gpu_devices() if (len(gpu_devices) > 1): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(gConfig["gpu_to_use"]) print("The available GPU devices: " + str(gpu_devices)) # devices, device_category = (gpu_devices, DeviceCategory.GPU) if len(gpu_devices) > 1 else (cpu_devices, DeviceCategory.CPU) for pretrained_model in pretrained_models.split( ','): # pre-trained architecture pretrained_model_dir = "pretrained_models/" + pretrained_model + "/" for dataset in datasets.split(','): # datasets data_dir = data_dirs + "/" + dataset + "/" features_dir = gConfig[ 'features_dir'] + "/" + pretrained_model + "/" + dataset + "/" model_dir = gConfig[ 'model_dir'] + "/" + pretrained_model + "/" + dataset + "/" log_dir = gConfig[ 'log_dir'] + "/" + pretrained_model + "/" # + dataset + "/" output_dir = gConfig[ 'output_dir'] + "/" + pretrained_model + "/" + dataset + "/" checkPathExists( [model_dir, features_dir, log_dir, output_dir]) if (pretrained_model == 'inceptionV3'): # inception v3 architecture_layers = inceptionV3_layers elif (pretrained_model == 'inception_resnetV2' ): # inception_resnet v2 architecture_layers = inceptionResnetV2_layers elif (pretrained_model == 'vgg_19'): # vgg 19 architecture_layers = vgg_19_layers logging( pretrained_model + " Transfer learning on " + dataset + " dataset", logger, 'info') if (pretrained_model == 'vgg_19'): layer_count_len = len(architecture_layers) else: layer_count_len = len(architecture_layers) - 1 if (extract_features_only): layer_count_len = 2 for layer_count in range(layer_start, layer_count_len): # learning_rate_ = round((learning_rate / (1.05 + ((layer_count - 1) / 10))), 8) train_num_epochs_ = train_num_epochs + ( 10 * (layer_count - 1)) if ("train" in mode): # training with tf.Graph().as_default() as graph: tf.logging.set_verbosity( tf.logging.ERROR ) # set the verbosity to INFO level train_dataset, train_images, train_labels = dataPreprocessing( 'train', data_dir, features_dir, train_batch_size, pretrained_model, logger) if (not extract_features_only): train_accuracy = train( 'train', train_dataset, train_images, train_labels, train_batch_size, train_num_epochs_, optimizer, learning_rate, learning_rate_decay_factor, num_epochs_per_decay, dropout_keep_prob, pretrained_model, model_dir, pretrained_model_dir, layer_count, logger) graph.finalize() if ("test" in mode): # testing with tf.Graph().as_default() as graph: tf.logging.set_verbosity( tf.logging.ERROR ) # set the verbosity to INFO level test_dataset, test_images, test_labels = dataPreprocessing( 'test', data_dir, features_dir, test_batch_size, pretrained_model, logger) if (not extract_features_only): test_accuracy = test( 'test', test_dataset, test_images, test_labels, test_batch_size, test_num_epochs, pretrained_model, model_dir, layer_count, logger) # test_accuracy = test('test', test_dataset, data_dir, pretrained_model, model_dir, layer_count, logger) graph.finalize() if (not extract_features_only): # save the results file logging( pretrained_model + " Writing results of " + dataset + " dataset", logger, 'info') if ("train" in mode): # training with open(output_dir + 'accuracy.txt', 'w') as writeresultsDict: writeresultsDict.write( 'Architecture\tDataset\tLayer\tEpochs\tLearning\tTrain_Loss\tTrain_Accuracy\tMax_Train_Accuracy\n' ) # for train_k, train_v in train_accuracy.items(): for layer_count in range(1, layer_count_len): print(train_accuracy[str(layer_count)]) # accuracy = train_accuracy[layer_count].split('\t') writeresultsDict.write( pretrained_model + '\t' + dataset + '\t' + str(layer_count) + '\t' + train_accuracy[str(layer_count)] + '\n') if ("test" in mode): # testing with open(output_dir + 'test_accuracy.txt', 'w') as writeresultsDict: writeresultsDict.write( 'Architecture\tDataset\tLayer\tTest_Accuracy\n' ) # for test_k, test_v in test_accuracy.items(): for layer_count in range(1, layer_count_len): # accuracy = test_accuracy[layer_count].split('\t') writeresultsDict.write( pretrained_model + '\t' + dataset + '\t' + str(layer_count) + '\t' + str(test_accuracy[str(layer_count)]) + '\n') elif 'infer' in mode: output_dir = gConfig[ 'output_dir'] + "/" + pretrained_models + "/" + datasets + "/" model_dir = gConfig[ 'model_dir'] + "/" + pretrained_models + "/" + datasets + "/" + str( infer_layer) + "/" checkPathExists([output_dir, model_dir]) inferenceResults = open(gConfig['output_dir'] + '/inference.txt', 'w') inferenceResults.write( 'Architecture\tActual Class\tPredicted Class\tProbability\n') # test image # image = "willy_wonka_new.jpg" # preprocessing # input_tensor, _, _ = dataPreprocessing('infer', infer_dir, dataset, train_batch_size, pretrained_model, logger) # inference # predictions = inference(mode, pretrained_model, pretrained_model_dir, infer_dir + imagefile, channels = 3, return_top_predictions=5) # PlotResizedImage(sess, image_path=image_path) # ineption_prediction = ClassifyInception(sess, image_path, return_top_predictions=5) # print(pretrained_model + ' - network prediction: ' + str(predictions) + '\n') classes = [] with open(resource_dir + '/' + datasets + '/labels.txt', 'r') as readfile: for line in readfile.readlines(): classes.append(line.split(':')[1].strip()) sess_transfer_learner, end_points, logits, input_tensor = InitializeTransferLearner( model_dir, pretrained_models, classes) init_model = True # # initialization # if (pretrained_model == 'inceptionV3' and not init_inception): # inception v3 # sess_inception = InitializeInception(pretrained_model_dir) # init_inception = True # elif (pretrained_model == 'inception_resnetV2' and not init_inception_resnet): # inception_resnet v2 # sess_inception_resnet, end_points, logits, input_tensor, imagenet_classes = InitializeInceptionResnet(model_dir) # init_inception_resnet = True # elif (pretrained_model == 'vgg_19' and not init_vgg): # vgg 19 # sess_vgg, prediction, input_tensor = InitializeVGG(pretrained_model_dir) # init_vgg = True # print('Inception - Resnet network prediction: ' + str(ineption_resnet_prediction[0]) + '\n') logging( datasets + " inference on " + pretrained_models + " network", logger, 'info') # inference count = 0 probability = 0.0 accuracy_ = 0.0 # entropy = 0.0 for subdir, dirs, files in os.walk(os.path.join(infer_dir)): for file in files: if file.endswith('.png') or file.endswith('.jpg'): # if (pretrained_model == 'inceptionV3' and init_inception): # inception v3 # probabilities, entropies = ClassifyInception(sess_inception, subdir + "/" + file) # elif (pretrained_model == 'inception_resnetV2' and init_inception_resnet): # inception_resnet v2 # probabilities, entropies = ClassifyInceptionResnet(sess_inception_resnet, end_points, logits, input_tensor, subdir + "/" + file) # elif (pretrained_model == 'vgg_19' and init_vgg): # vgg 19 # probabilities, entropies = ClassifyVGG(sess_vgg, prediction, input_tensor, subdir + "/" + file) # if(init_model): probabilities, actual_class, pred_class, accuracy, processed_image = ClassifyTransferLearner( sess_transfer_learner, end_points, logits, input_tensor, subdir + "/" + file, is_inference=True) grad_cam(subdir + "/" + file, processed_image, input_tensor, end_points, sess_transfer_learner, classes.index(pred_class), num_classes=len(classes), output_path=subdir + "/" + file.split('.')[0] + '_cam.jpg') probability += probabilities accuracy_ += accuracy inferenceResults.write(pretrained_models + '\t' + str(file) + '\t' + actual_class + '\t' + pred_class + '\t' + str(accuracy) + '\t' + str(round(probabilities, 2)) + '\n') count += 1 if (count): probability = (probability * 100) / count accuracy_ = (accuracy_ * 100) / count inferenceResults.write(pretrained_models + '\t' "Accuracy: " + str(accuracy_) + '\t' + "Probability: " + str(round(probability, 2)) + '\n') print(pretrained_models + ' network predictions on ' + datasets + " - Probability: " + str(probability) + " - Accuracy: " + str(accuracy_)) # if (pretrained_model == 'inceptionV3' and init_inception): # inception v3 # CloseInceptionResnet(sess_inception) # init_inception = False # elif (pretrained_model == 'inception_resnetV2' and init_inception_resnet): # inception_resnet v2 # CloseInceptionResnet(sess_inception_resnet) # init_inception_resnet = False # elif (pretrained_model == 'vgg_19' and init_vgg): # vgg 19 # CloseVGG(sess_vgg) # init_vgg = False CloseTransferLearner(sess_transfer_learner) inferenceResults.close() elif (mode == "serve"): # serve output_dir = gConfig[ 'output_dir'] + "/" + pretrained_models + "/" + datasets + "/" model_dir = gConfig[ 'model_dir'] + "/" + pretrained_models + "/" + datasets + "/" + str( infer_layer) + "/" checkPathExists([output_dir, model_dir]) classes = [] with open(resource_dir + '/' + datasets + '/labels.txt', 'r') as readfile: for line in readfile.readlines(): classes.append(line.split(':')[1].strip()) model_server = Serving(site, port, model_dir, pretrained_models, infer_dir, output_dir, log_dir, gpu_to_use, classes, certificate=certificate, logger=logger) model_server.run() except Exception as ex: print("main function failed - " + str(ex)) raise ex
def create_variables(self): try: with tf.name_scope("model_inputs"): self.states = tf.placeholder(tf.float32, [None, self.max_layers * 4], name="states") # raw state representation with tf.name_scope("predict_actions"): with tf.variable_scope("policy_network"): # initialize policy network # state input is the first input fed into the controller RNN. the rest of the inputs are fed to the RNN internally # with tf.name_scope('state_input'): state_input = tf.placeholder(dtype=tf.int32, shape=(1, None), name='state_input') # self.state_input = state_input nas_cell = tf.nn.rnn_cell.LSTMCell(35) cell_state = nas_cell.zero_state(batch_size=1, dtype=tf.float32) embedding_weights = [] # for each possible state, create a new embedding. Reuse the weights for multiple layers. with tf.variable_scope('embeddings', reuse=tf.AUTO_REUSE): # for i in range(len(self.state_space)): for key, value, index in zip(self.state_space.items(), range(len(self.state_space))): state_ = value size = len(value) # size + 1 is used so that 0th index is never updated and is "default" value weights = tf.get_variable('state_embeddings_%d' % index, shape=[size + 1, self.embedding_dim], initializer=tf.initializers.random_uniform(-1., 1.)) embedding_weights.append(weights) # initially, cell input will be 1st state input embeddings = tf.nn.embedding_lookup(embedding_weights[0], state_input) cell_input = embeddings for i in range(self.num_layers): for key, value in self.state_space.items(): state_id = i % len(self.state_space) size = len(value) with tf.name_scope('controller_output_%d' % i): # feed the ith layer input (i-1 layer output) to the RNN outputs, final_state = tf.nn.dynamic_rnn(nas_cell, cell_input, initial_state=cell_state, dtype=tf.float32) # add a new classifier for each layers output classifier = tf.layers.dense(outputs[:, -1, :], units=size, name='classifier_%d' % (i), reuse=False) predictions = tf.nn.softmax(classifier) # feed the previous layer (i-1 layer output) to the next layers input, along with state take the class label cell_input = tf.argmax(predictions, axis=-1) cell_input = tf.expand_dims(cell_input, -1, name='pred_output_%d' % (i)) cell_input = tf.cast(cell_input, tf.int32) cell_input = tf.add(cell_input, 1) # we avoid using 0 so as to have a "default" embedding at 0th index # embedding lookup of this state using its state weights ; reuse weights cell_input = tf.nn.embedding_lookup(embedding_weights[state_id], cell_input, name='cell_output_%d' % (i)) cell_state = final_state # store the tensors for later loss computation self.cell_outputs.append(cell_input) self.policy_classifiers.append(classifier) self.policy_actions.append(predictions) # self.policy_outputs = self.policy_network(self.states, self.max_layers) # self.action_scores = tf.identity(self.policy_outputs, name="action_scores") # self.predicted_action = tf.cast(tf.scalar_mul(self.division_rate, self.action_scores), tf.int32, name="predicted_action") policy_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy_network") # regularization loss # compute loss and gradients with tf.name_scope("compute_gradients"): self.discounted_rewards = tf.placeholder(tf.float32, (None,), name="discounted_rewards") # gradients for selecting action from policy network with tf.variable_scope("policy_network", reuse=True): self.logprobs = self.policy_network(self.states, self.max_layers) # compute policy loss and regularization loss self.cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logprobs[:, -1, :], labels=self.states) self.pg_loss = tf.reduce_mean(self.cross_entropy_loss) self.reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_network_variables]) # Regularization self.loss = self.pg_loss + self.reg_param * self.reg_loss self.gradients = self.optimizer.compute_gradients(self.loss) # compute gradients # compute policy gradients for i, (grad, var) in enumerate(self.gradients): if grad is not None: self.gradients[i] = (grad * self.discounted_rewards, var) # training update with tf.name_scope("train_policy_network"): # apply gradients to update policy network self.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step) except Exception as e: logging("Create variables failed - " + str(e), self.logger, 'error')
def train(dataset, dataset_name, model_dir, num_episodes, max_depth, initial_filters, num_layers, num_hidden, initial_learning_rate, learning_rate_decay_factor, train_batch_size, test_batch_size, train_num_epochs, input_dimensions, num_classes, optimizer, num_steps_per_decay, num_child_steps_per_cycle, exploration, discount_factor, log_dir, output_dir, logger = None): try: # loading log # logging_dict = {} # csv_fields = ['dataset','policy_episode', 'policy_layers', 'policy_neurons', 'policy_loss', 'search_space', 'max depth of child network', 'lr optimizer', # 'child steps/episod', 'state', 'steps', 'reward', 'time taken'] # csv_fields = ['dataset','pg_episode','pg_layers','pg_neurons','network_loss','network_search_space','network_state','network_reward','time_stamp','time_taken'] # if (os.path.exists(output_dir + '/' + dataset_name + '_results.csv')): # with open(output_dir + '/' + dataset_name + '_results.csv', mode='r') as csv_file: # csv_reader = csv.DictReader(csv_file) # for row in csv_reader: # logging_dict[row['time_stamp']] = row['dataset'] + "|" + row['pg_episode'] + "|" + row['pg_layers'] + "|" + row['pg_neurons'] + "|" + \ # row['network_loss'] + "|" + row['network_search_space'] + "|" + row['network_state'] + "|" + row['network_reward'] + "|" + \ # row['time_stamp'] + "|" + row['time_taken'] csv_fields = ['dataset', 'policy_episode', 'policy_layers', 'policy_neurons', 'policy_loss', 'policy_probs', 'lr optimizer', 'search_space', 'state', 'max depth of child network', 'child_steps_episod', 'steps', 'reward', 'network_accuracy', 'moving_accuracy', 'total_reward', 'gradients_entropty_avg', 'gradients_entropty_std', 'trainable_variables', 'time_taken'] with open(output_dir + '/' + dataset_name + '_results.csv', 'w') as target_csv_file: writer = csv.DictWriter(target_csv_file, fieldnames=csv_fields) writer.writeheader() search_space_fields = ['depth','dropout','learning_rate','momentum'] max_depth_layers, _, min_depth_layers, _ = get_residual_layer(max_depth) search_space = {'0': list(np.arange(min_depth_layers, sum(max_depth_layers) + 1, 1)), # depth '1': list(np.arange(0.05, 0.35, 0.05)), # 'dropout' '2': list(np.arange(0.01, 0.1, 0.01)) + list(np.arange(0.001, 0.01, 0.001)), # list(np.arange(0.1, 0.99, 0.1)) + list(np.arange(0.0001, 0.001, 0.0001)), # 'learning_rate' '3': list(np.arange(0.7, 0.99, 0.05))} # 'momentum' -> 0.6-0.99 # '4': list(np.arange(0, 1.0, 0.01)), # 'total_reward' -> 0-1 # '5': list(np.arange(0, 1.0, 0.01)), # 'loss' -> 0-1 # '6': list(np.arange(0, 1.0, 0.01)), # average entropy # '7': list(np.arange(0, 1.0, 0.01))} # std entropy reinforce = Reinforce(model_dir, log_dir, initial_learning_rate = initial_learning_rate, num_hidden = num_hidden, num_layers = num_layers, search_space = search_space, num_steps_per_decay=num_steps_per_decay, learning_rate_decay_factor = learning_rate_decay_factor, optimizer = optimizer, exploration = exploration, discount_factor = discount_factor, logger = logger) # manages the training and evaluation of the Controller RNN net_manager = NetManager(search_space, input_dimensions=input_dimensions, num_classes=num_classes, dataset = dataset, dataset_name=dataset_name, log_dir=log_dir, train_batch_size = train_batch_size, test_batch_size = test_batch_size, train_num_epochs = train_num_epochs, max_depth=max_depth, num_child_steps_per_cycle=num_child_steps_per_cycle, initial_filters=initial_filters, model_dir = model_dir, logger = logger) # handles the training and reward computation of a model # print("Search Space: ", search_space) prev_accuracy, prev_step, total_rewards, reward, network_loss, ent_avg, ent_std, elapsed_time = 0.0, 0, 0.01, 0.01, 0.0, 0.0, 0.01, 0.01 # get_residual_layers, _ = get_residual_layer(max_depth) # max_channels = get_residual_filters(sum(get_residual_layers), min_channels) state = np.array([[0, 0, 0, 0]], dtype=np.int32) # max_channels, network_loss # state = np.array([[0.05, 0.1, 0.7, 0.0, 0.0]], dtype=np.float32) # max_channels, network_loss # state = np.array([[min(search_space['0']), min(search_space['1']), min(search_space['2']), min(search_space['3']), max_channels, prev_accuracy]], dtype=np.int32) # min_filters, accuracy # entropy = lambda p: -np.sum(p * np.log2(p)) total_rewards = 0.0 action_in_search_space = [] for i_episode in range(num_episodes): # state_in_search_space = get_state_search(search_space, state[0]) # state = [abs(x) for x in state] if(i_episode != 0): action = reinforce.get_action(state, init=False) else: action = reinforce.get_action(state, init=True) # action = [[list(state[0])]] if all(ai >= 0 for ai in action[0][0]): start_time = time.time() action_in_search_space = [get_state_search(search_space, action[0][0], network_loss, ent_avg, ent_std, total_rewards)] # max_channels, network_loss print("Actions: ", action_in_search_space[0][0]) # print(action[0][0]) reward, prev_accuracy, network_loss, prev_step, steps, probs, iterations, moving_accuracy, trainable_variables = net_manager.get_reward(action_in_search_space[0][0], prev_step, prev_accuracy) ent_avg = sum(sc.stats.entropy(probs)) / num_classes #/ (train_batch_size * num_child_steps_per_cycle * iterations) ent_std = np.std(sc.stats.entropy(probs)) * 10 elapsed_time += (time.time() - start_time) print("Reward: " + str(reward) + " | Accuracy: " + str(prev_accuracy)) else: reward = 0.01 total_rewards += reward print('Total Reward: ' + str(round(total_rewards, 4))) # state_in_search_space = get_state_search(search_space, action_in_search_space[0][0]) # max_channels = get_residual_filters(action_in_search_space[0][0][0], min_channels) state = action[0] # state[0][0] = round(total_rewards * 10, 0) if round(network_loss * 10, 0) > 0 else 0.01 # state[0][1] = round(network_loss * 10, 0) if round(network_loss * 10, 0) > 0 else 0.01 # state[0][2] = round(ent_avg, 0) if round(ent_avg, 0) > 0 else 0.01 # state[0][3] = round(ent_std * 10, 0) if round(ent_std * 10, 0) > 0 else 0.1 reinforce.storeRollout(state, reward) loss, log_probs = reinforce.train_step(1) # logging log_str = "time taken: " + str(elapsed_time / 60) + " | problem: " + dataset_name + " | episode: " + str(i_episode) + " | steps: " + str(steps) + " | loss: " + str(round(loss, 3)) + \ " | log_probs: " + str(log_probs) + " | state: " + str(action[0]) + " | reward: " + str(round(reward, 2)) + " | network accuracy: " + str(round(prev_accuracy * 100, 2)) + "\n" # logging_dict[i_episode] = dataset_name + "|" + str(i_episode) + "|" + str(num_layers) + "|" + str(num_hidden) + "|" + str(loss) + "|" + str(search_space_fields) + "|" + \ # str(state[0]) + "|" + str(max_depth) + "|" + optimizer + "|" + str(num_child_steps_per_cycle) + "|" + str(steps) + "|" + str(reward) + "|" + \ # str(datetime.datetime.now().time()).split('.')[0] + "|" + str(elapsed_time) print(log_str) # writing logs with open(output_dir + '/' + dataset_name + '_results.csv', 'a') as target_csv_file: writer = csv.writer(target_csv_file) writer.writerows([[dataset_name, str(i_episode), str(num_layers), str(num_hidden), str(round(loss * 100, 3)), str(log_probs), str(optimizer), str(search_space_fields), str(action_in_search_space[0][0]), str(max_depth), str(num_child_steps_per_cycle), str(steps), str(round(reward, 2)), str(round(prev_accuracy * 100, 2)), str(round(moving_accuracy * 100, 2)), str(round(total_rewards, 2)), str(round(ent_avg * 100, 2)), str(round(ent_std * 100, 2)), str(round(trainable_variables, 2)), str(round(elapsed_time, 2))]]) except Exception as e: logging("Meta-RL training failed - " + str(e), logger, 'error')
def test(mode, dataset, images, labels, batch_size, num_epochs, pretrained_model, model_dir, layer_count, logger = None): try: # inception and inception_resnet only num_batches_per_epoch = int(dataset.num_samples / batch_size) + 1 num_steps_per_epoch = num_batches_per_epoch # initializing the model if (pretrained_model == 'inceptionV3'): # inception V3 architecture_layers = inceptionV3_layers with slim.arg_scope(inception_v3_arg_scope()): # create the model inference logits, end_points = inception_v3(images, num_classes=dataset.num_classes, is_training=False) elif (pretrained_model == 'inception_resnetV2'): # inception_resnetV2 architecture_layers = inceptionResnetV2_layers with slim.arg_scope(inception_resnet_v2_arg_scope()): # create the model inference logits, end_points = inception_resnet_v2(images, num_classes=dataset.num_classes, is_training=False) elif (pretrained_model == 'vgg_19'): # vgg 19 architecture_layers = vgg_19_layers with slim.arg_scope(vgg_arg_scope()): # create the model inference logits, end_points = vgg_19(images, num_classes=dataset.num_classes, is_training=True) #=(mode == 'train')) if (pretrained_model == 'inceptionV3' or pretrained_model == 'inception_resnetV2'): # inceptionV3 or inception_resnetV2 logging("Transfer learning testing layers-" + str(layer_count) + ": " + str(architecture_layers[:(layer_count + 1)]), logger, 'info') elif (pretrained_model == 'vgg_19'): # vgg 19 logging("Transfer learning testing layers-" + str(layer_count) + ": " + str(architecture_layers[:(layer_count)]), logger, 'info') # the metrics that need to predict it isn't one_hot_encoded. if (pretrained_model == 'inceptionV3' or pretrained_model == 'inception_resnetV2'): # inceptionV3 or inception_resnetV2 predictions = tf.argmax(end_points['Predictions'], 1) #end_points['Predictions'], 1) # probabilities = end_points['Predictions'] elif (pretrained_model == 'vgg_19'): # vgg 19 predictions = tf.cast(tf.to_int64(tf.argmax(logits, 1)), tf.float32) accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels) metrics_op = tf.group(accuracy_update) #, probabilities) # create the global step and an increment op for monitoring global_step = get_or_create_global_step() global_step_op = tf.assign(global_step, global_step + 1) # no apply_gradient method so manually increasing the global_step # define some scalar quantities to monitor tf.summary.scalar('test_accuracy', accuracy) # summary_op = tf.summary.merge_all() # get all the variables to restore from the checkpoint file and create the saver function to restore variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(var_list=variables_to_restore) def restore_fn(sess): saver.restore(sess, tf.train.latest_checkpoint(model_dir + str(layer_count) + '/')) # get the supervisor sv = tf.train.Supervisor(logdir=model_dir + str(layer_count) + '/', summary_op=None, saver=None, init_fn=restore_fn) # run in one session with sv.managed_session() as sess: for step in range(num_steps_per_epoch * num_epochs): sess.run(sv.global_step) if step == 0: sess.run(accuracy) else: _, global_step_count, accuracy_value = sess.run([metrics_op, sv.global_step, accuracy]) if step != 0 and step % 10 == 0: logging('Step: ' + str(step) + ' | test accuracy: ' + str(accuracy_value), logger, 'info') # sv.summary_computed(sess, sess.run(summary_op)) # at the end of all the evaluation, show the final accuracy total_accuracy = sess.run(accuracy) logging('Testing Final accuracy: ' + str(total_accuracy), logger, 'info') if not str(layer_count) in test_accuracy: test_accuracy[str(layer_count)] = str(total_accuracy * 100) # visualize the last batch's images just to see what our model has predicted # raw_image, labels, predictions = sess.run([raw_image, labels, predictions]) # for i in range(10): # image, label, prediction = raw_image[i], labels[i], predictions[i] # prediction_name, label_name = dataset.labels_to_name[prediction], dataset.labels_to_name[label] # text = 'Prediction: %s \n Ground Truth: %s' % (prediction_name, label_name) # # print(text) # img_plot = plt.imshow(image) # # # set up the plot and hide axes # plt.title(text) # img_plot.axes.get_yaxis().set_ticks([]) # img_plot.axes.get_xaxis().set_ticks([]) # plt.show() logging("Transfer learning testing completed successfully, accuracy: " + str(total_accuracy * 100), logger, 'info') return test_accuracy except Exception as e: logging("Transfer learning testing failed - " + str(e), logger, 'error') test_accuracy[str(layer_count)] = 0.0 return test_accuracy