def evaluate(tmp, state_processor, policy, sess, num_of_proposal=15): """ Evaluates a given network on an image Args: tmp: A tuple of [image, target] state_processor: An instance of StateProcessor class policy: An instance of make_epsilon_greedy_policy function sess: Tensorflow session object num_of_proposal: Number of proposals that are used for evaluation Returns: Mean precision for the input image """ # Unpacking input image and its ground truth img = tmp[0] target = tmp[1] succ = 0 # Creates an object localizer instance im2 = Image.frombytes("RGB", (img['image_width'], img['image_height']), img['image']) env = ObjLocaliser(np.array(im2), target) # Num of episodes that Agent can interact with an input image for i_episode in range(num_of_proposal): # Reset the environment env.Reset(np.array(im2)) state = env.wrapping() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) t = 0 action = 0 # The agent searches in an image until terminatin action is used or the agent reaches threshold 50 actions while (action != 10) and (t < 50): # Choosing action based on epsilon-greedy with probability 0.8 action_probs, qs = policy(sess, state, 0.2) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # Taking action in environment and recieving reward reward = env.takingActions(VALID_ACTIONS[action]) # If an object is successfuly localized increase counter if reward == 3: succ += 1 # Observing next state next_state = env.wrapping() next_state = state_processor.process(sess, next_state) next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) state = next_state t += 1 return (float(succ) / num_of_proposal)
def visualizing_seq_act(model_name, add, ground_truth, output_name): """ Visualizing sequence of actions Args: model_name: The model parameters that will be loaded for testing. add: Path to an image ground_truth: Target coordinates output_name: Name of the output file """ # Initiates Tensorflow graph tf.reset_default_graph() # Where we save our checkpoints and graphs experiment_dir = os.path.abspath("../experiments/{}".format(model_name)) # Create a glboal step variable global_step = tf.Variable(0, name='global_step', trainable=False) # Create estimators q_estimator = Estimator(scope="q_estimator", summaries_dir=experiment_dir) # State processor state_processor = StateProcessor() # Creates an object localizer instance im2 = np.array(Image.open(add)) env = ObjLocaliser( np.array(im2), { 'xmin': [ground_truth[0]], 'xmax': [ground_truth[2]], 'ymin': [ground_truth[1]], 'ymax': [ground_truth[3]] }) with tf.Session() as sess: fig = plt.figure() ims = [] # For 'system/' summaries, usefull to check if currrent process looks healthy current_process = psutil.Process() # Create directories for checkpoints and summaries checkpoint_dir = os.path.join(experiment_dir, "bestModel") checkpoint_path = os.path.join(checkpoint_dir, "model") # Initiates a saver and loads previous saved model if one was found saver = tf.train.Saver() latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) # The policy we're following policy = make_epsilon_greedy_policy(q_estimator, len(VALID_ACTIONS)) precisions = [] final_reward = 0 # Keeps going until the agent could successfully localize and object while final_reward != 3: plt.close() fig = plt.figure() ims = [] # Reset the environment env.Reset(np.array(im2)) state = env.wrapping() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) t = 0 action = 0 # The agent searches in an image until terminatin action is used or the agent reaches threshold 50 actions while (action != 10) and (t < 50): # Choosing action based on epsilon-greedy with probability 0.8 action_probs, qs = policy(sess, state, 0.2) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # Takes action and observes new state and reward reward = env.takingActions(VALID_ACTIONS[action]) next_state = env.wrapping() if reward == 3: final_reward = 3 imgplot = plt.imshow(env.my_draw()) ims.append([imgplot]) # Processing the new state next_state = state_processor.process(sess, next_state) next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) state = next_state t += 1 print "Unsuccessfull. Next try!" # Saving animation ani = animation.ArtistAnimation(fig, ims, interval=1000, blit=True, repeat_delay=1000) if not os.path.exists('../experiments/{}/anim/'.format(model_name)): os.makedirs('../experiments/{}/anim/'.format(model_name)) path = '../experiments/{}/anim/'.format(model_name) ani.save('../experiments/{}/anim/{}.mp4'.format( model_name, output_name)) print "The video is stored in ../experiments/{}/anim/{}.mp4".format( model_name, output_name)
def DQL_testing(num_episodes, category, model_name): """ Evaluates a model on testing set. Args: num_episodes: Number of episodes that the agect can interact with an image category: The category that is going to be used for evaluation model_name: The model name that is going to be evaluated Returns: Mean precision for the given category over test set """ # Checks whether records are availible destination = "../data/" if not (os.path.isfile(destination + "test_input.npz") or os.path.isfile(destination + "test_target.npz")): print("Files are not ready!!!") return 0 else: print("Records are already prepared!!!") # Initiates Tensorflow graph tf.reset_default_graph() # Where we save our checkpoints and graphs experiment_dir = os.path.abspath("../experiments/{}".format(model_name)) # Create a glboal step variable global_step = tf.Variable(0, name='global_step', trainable=False) # Create estimators q_estimator = Estimator(scope="q_estimator", summaries_dir=experiment_dir) # State processor state_processor = StateProcessor() with tf.Session() as sess: # For 'system/' summaries, usefull to check if currrent process looks healthy current_process = psutil.Process() # Create directories for checkpoints and summaries checkpoint_dir = os.path.join(experiment_dir, "bestModel") checkpoint_path = os.path.join(checkpoint_dir, "model") # Initiates a saver and loads previous saved model if one was found saver = tf.train.Saver() latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) # Get the current time step total_t = sess.run(tf.contrib.framework.get_global_step()) # The policy we're following policy = make_epsilon_greedy_policy(q_estimator, len(VALID_ACTIONS)) precisions = [] for indx, tmp in enumerate(extractData(category, "test", 32)): # Unpacking image and ground truth img = tmp[0] target = tmp[1] succ = 0 # Creates an object localizer instance im2 = Image.frombytes("RGB", (img['image_width'], img['image_height']), img['image']) env = ObjLocaliser(np.array(im2), target) print "Image{} is being loaded: {}".format(indx, img['image_filename']) # Num of episodes that Agent can interact with an input image for i_episode in range(num_episodes): # Reset the environment env.Reset(np.array(im2)) state = env.wrapping() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) t = 0 action = 0 # The agent searches in an image until terminatin action is used or the agent reaches threshold 50 actions while (action != 10) and (t < 50): # Choosing action based on epsilon-greedy with probability 0.8 action_probs, qs = policy(sess, state, 0.2) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # Takes action and observes new state and reward reward = env.takingActions(VALID_ACTIONS[action]) next_state = env.wrapping() if reward == 3: succ += 1 # Processing the new state next_state = state_processor.process(sess, next_state) next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) state = next_state t += 1 print "number of actions for step {} is: {}".format( i_episode, t) precisions.append(float(succ) / num_episodes) print "image {} precision: {}".format(img['image_filename'], precisions[-1]) print "num of images:{}".format(len(precisions)) print "mean precision: {}".format(np.mean(precisions)) return np.mean(precisions)
def DQL(num_episodes, replay_memory_size, replay_memory_init_size, update_target_estimator_every, discount_factor, epsilon_start, epsilon_end, epsilon_decay_steps, category, model_name): """ Builds and trains deep Q-network Args: num_episodes: Number of episodes that the agect can interact with an image replay_memory_size: Number of the most recent experiences that would be stored replay_memory_init_size: Number of experiences to initialize replay memory update_target_estimator_every: Number of steps after which estimator parameters are copied to target network discount_factor: Discount factor epsilon_start: Epsilon decay schedule start point epsilon_end: Epsilon decay schedule end point epsilon_decay_steps: Epsilon decay step rate category: Indicating the categories are going to be used for training model_name: The trained model would be saved with this name """ # Downloads and prepares dataset preparedataset() # Initiates Tensorflow graph tf.reset_default_graph() # Where checkpoints and graphs are saved experiment_dir = os.path.abspath("../experiments/{}".format(model_name)) # Create a glboal step variable global_step = tf.Variable(0, name='global_step', trainable=False) # Create estimators q_estimator = Estimator(scope="q_estimator", summaries_dir=experiment_dir) target_estimator = Estimator(scope="target_q") # State processor state_processor = StateProcessor() with tf.Session() as sess: # Initializes the network weights sess.run(tf.initialize_all_variables()) Transition = namedtuple( "Transition", ["state", "action", "reward", "next_state", "done"]) # The replay memory replay_memory = [] # Make model copier object estimator_copy = ModelParametersCopier(q_estimator, target_estimator) # For 'system/' summaries, usefull to check if currrent process looks healthy current_process = psutil.Process() # Create directories for checkpoints and summaries checkpoint_dir = os.path.join(experiment_dir, "checkpoints") checkpoint_path = os.path.join(checkpoint_dir, "model") report_path = os.path.join(experiment_dir, "report") best_model_dir = os.path.join(experiment_dir, "bestModel") best_model_path = os.path.join(best_model_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if not os.path.exists(report_path): os.makedirs(report_path) if not os.path.exists(best_model_dir): os.makedirs(best_model_dir) f = open(report_path + "/log.txt", 'w') # Initiates a saver and loads previous saved model if one was found saver = tf.train.Saver() latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) # Get the current time step total_t = sess.run(tf.contrib.framework.get_global_step()) # The epsilon decay schedule epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) # The policy we're following policy = make_epsilon_greedy_policy(q_estimator, len(VALID_ACTIONS)) # Initiates counters episode_counter = 0 best_pre = 0 eval_pre = [] eval_set = [] batch_size = 32 done = False num_located = 0 contImage = 0 # Loads images from dataset for indx, tmp in enumerate(extractData(category, "train", batch_size)): #Contador para ver quantas imagens foram lidas contImage += 1 if contImage >= 453: break # Unpacking image and ground truth img = tmp[0] target = tmp[1] # The first 100 images are used for evaluation if len(eval_set) < 100: print("Populating evaluation set...") eval_set.append(tmp) else: # Every 20 images the neural network is evaluated if indx % 20 == 0: print("Evaluation started ...") print("Every 20 images the neural network is evaluated") for tmp2 in eval_set: eval_pre.append( evaluate(tmp2, state_processor, policy, sess)) if len(eval_pre) > 99: # Saves the result of evaluation with Tensorboard print("Evaluation mean precision: {}".format( np.mean(eval_pre))) f.write("Evaluation mean precision: {}\n".format( np.mean(eval_pre))) episode_summary = tf.Summary() episode_summary.value.add( simple_value=np.mean(eval_pre), tag="episode/eval_acc") q_estimator.summary_writer.add_summary( episode_summary, episode_counter) q_estimator.summary_writer.flush() # If the achieved result is better than the previous results current state of the model is saved if np.mean(eval_pre) > best_pre: print( "Best model changed with mean precision: {}" .format(np.mean(eval_pre))) f.write( "Best model changed with mean precision: {}\n" .format(np.mean(eval_pre))) best_pre = np.mean(eval_pre) saver.save(tf.get_default_session(), best_model_path) eval_pre = [] # Creates an object localizer instance #im2 = Image.frombytes("RGB",(img['image_width'],img['image_height']),img['image']) PARA RGB im2 = Image.frombytes( "RGB", (img['image_width'], img['image_height']), img['image']) env = ObjLocaliser(np.array(im2), target) print("Image{} is being loaded: {}".format( indx, img['image_filename'])) f.write("Image{} is being loaded: {}".format( indx, img['image_filename'])) # Populates the replay memory with initial experiences if len(replay_memory) < replay_memory_init_size: print("Populating replay memory...\n") # Reads and processes the current state env.Reset(np.array(im2)) state = env.wrapping() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) # Populating replay memory with the minimum threshold for i in range(replay_memory_init_size): # Epsilon for this time step action_probs, _ = policy( sess, state, epsilons[min(total_t, epsilon_decay_steps - 1)]) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # Takes action and observes new state and reward reward = env.takingActions(VALID_ACTIONS[action]) next_state = env.wrapping() # Checks whether termination action is taken if action == 10: done = True else: done = False try: # Processing the new state next_state = state_processor.process( sess, next_state) next_state = np.append(state[:, :, 1:], np.expand_dims( next_state, 2), axis=2) replay_memory.append( Transition(state, action, reward, next_state, done)) state = next_state if done: env.Reset(np.array(im2)) state = env.wrapping() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) else: state = next_state except: pass # Num of episodes that Agent can interact with an input image for i_episode in range(num_episodes): # Save the current checkpoint saver.save(tf.get_default_session(), checkpoint_path) # Reset the environment env.Reset(np.array(im2)) state = env.wrapping() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) loss = None t = 0 action = 0 e = 0 r = 0 # The agent searches in an image until terminatin action is used or the agent reaches threshold 50 actions while (action != 10) and (t < 50): # Epsilon for this time step epsilon = epsilons[min(total_t, epsilon_decay_steps - 1)] # Maybe update the target estimator if total_t % update_target_estimator_every == 0: estimator_copy.make(sess) print( "\nCopied model parameters to target network.") # Take a step action_probs, qs = policy(sess, state, epsilon) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # Takes action and observes new state and its reward reward = env.takingActions(VALID_ACTIONS[action]) next_state = env.wrapping() if action == 10: done = True else: done = False #The origin code not have try/except. I put because I had an error when image shape was 0 try: # Processing the new state next_state = state_processor.process( sess, next_state) next_state = np.append(state[:, :, 1:], np.expand_dims( next_state, 2), axis=2) # If our replay memory is full, pop the first element if len(replay_memory) == replay_memory_size: replay_memory.pop(0) # Save transition to replay memory replay_memory.append( Transition(state, action, reward, next_state, done)) # Sample a minibatch from the replay memory samples = random.sample(replay_memory, batch_size) states_batch, action_batch, reward_batch, next_states_batch, done_batch = map( np.array, zip(*samples)) # Calculate q values and targets q_values_next = target_estimator.predict( sess, next_states_batch) targets_batch = reward_batch + np.invert( done_batch).astype( np.float32) * discount_factor * np.amax( q_values_next, axis=1) # Perform gradient descent update states_batch = np.array(states_batch) loss = q_estimator.update(sess, states_batch, action_batch, targets_batch) print( "Step {} ({}) @ Episode {}/{}, action {}, reward {},loss: {}" .format(t, total_t, i_episode + 1, num_episodes, action, reward, loss)) f.write( "Step {} ({}) @ Episode {}/{}, action {}, reward {},loss: {}\n" .format(t, total_t, i_episode + 1, num_episodes, action, reward, loss)) # Counting number of correct localized objects if reward == 3: num_located += 1 state = next_state t += 1 total_t += 1 e = e + loss r = r + reward except: pass episode_counter += 1 # Add summaries to tensorboard episode_summary = tf.Summary() episode_summary.value.add(simple_value=epsilon, tag="episode/epsilon") episode_summary.value.add(simple_value=r, tag="episode/reward") episode_summary.value.add(simple_value=t, tag="episode/length") episode_summary.value.add( simple_value=current_process.cpu_percent(), tag="system/cpu_usage_percent") episode_summary.value.add( simple_value=current_process.memory_percent(), tag="system/v_memeory_usage_percent") q_estimator.summary_writer.add_summary( episode_summary, episode_counter) q_estimator.summary_writer.flush() print("Episode Reward: {} Episode Length: {}".format(r, t)) f.write("Episode Reward: {} Episode Length: {}".format( r, t)) print('total de imagens TREINADAS {}'.format(contImage)) f.write("total de imagens TREINADAS {}".format(contImage)) print('Categoria das imagens {}'.format(category)) f.write("Categoria das imagens {}".format(category)) f.close() print("number of correct located objects:{}".format(num_located))
def visualize_layers(model_name, add, layer_num): """ Visualizing sequence of actions Args: model_name: The model parameters that will be loaded for visualizing. add: Path to an image layer_num: Layer number to be visualized """ # Initiates Tensorflow graph tf.reset_default_graph() # Where we save our checkpoints and graphs experiment_dir = os.path.abspath("../experiments/{}".format(model_name)) # Create a glboal step variable global_step = tf.Variable(0, name='global_step', trainable=False) # Create estimators q_estimator = Estimator(scope="q_estimator", summaries_dir=experiment_dir) # State processor state_processor = StateProcessor() with tf.Session() as sess: # For 'system/' summaries, usefull to check if currrent process looks healthy current_process = psutil.Process() # Create directories for checkpoints and summaries checkpoint_dir = os.path.join(experiment_dir, "bestModel") checkpoint_path = os.path.join(checkpoint_dir, "model") # Initiates a saver and loads previous saved model if one was found saver = tf.train.Saver() latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) # The policy we're following policy = make_epsilon_greedy_policy( q_estimator, len(VALID_ACTIONS)) # Creates an object localizer instance im2 = np.array(Image.open(add)) env = ObjLocaliser(np.array(im2),{'xmin':[0], 'xmax':[1], 'ymin':[0], 'ymax':[1]}) # Reset the environment env.Reset(np.array(im2)) state = env.wrapping() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) # Visualizing the network layers layer = q_estimator.visulize_layers(sess, state.reshape((-1, 84, 84, 4)), layer_num) plotNNFilter(layer, model_name, layer_num)