def __init__(self): # Neural network for embedding text self.n_text = 250 self.text_embedder = embed_token_seq.EmbedTokenSeq(self.n_text) text_embedding = self.text_embedder.get_output() #################### # Create bucket network self.buckets = [15, 30, 45] self.embed_token_seq_buckets = [] for bucket in self.buckets: embed_token_seq_bucket = \ embed_token_seq.EmbedTokenSeq(self.n_text, num_steps=bucket, create_copy=self.text_embedder) self.embed_token_seq_buckets.append(embed_token_seq_bucket) #################### # Image Preprocessing self.image_preprocessor = image_preprocessing.ImagePreprocessing() # Neural network for embedding image self.n_image = 200 self.image_embedder = embed_image.EmbedImage(self.n_image, image_dim) image_embedding = self.image_embedder.get_output() # Network for embedding past action # 6 actions, one for no-action self.n_status_flag_dim = 18 self.n_direction_dim = 24 self.n_previous_action_embedding = self.n_status_flag_dim + self.n_direction_dim self.null_previous_action = (2, 5) self.previous_action_embedder = epa.EmbedPreviousAction( 3, self.n_status_flag_dim, 6, self.n_direction_dim) previous_action_embedding = self.previous_action_embedder.get_output() # Neural network for mixing the embeddings of text # and image and generate probabilities over block-ids and direction if self.train_alg == TrainingAlgorithm.SUPERVISEDMLE \ or self.train_alg == TrainingAlgorithm.REINFORCE \ or self.train_alg == TrainingAlgorithm.MIXER: use_softmax = True else: use_softmax = False self.mix_and_gen_prob = mix_and_gen_prob.MixAndGenerateProbabilities( self.n_text, self.n_image, self.n_previous_action_embedding, text_embedding, image_embedding, previous_action_embedding, 5, use_softmax) #################### self.mix_and_gen_prob_buckets = [] for i in range(0, len(self.buckets)): mix_and_gen_prob_bucket = mix_and_gen_prob.MixAndGenerateProbabilities( self.n_text, self.n_image, self.n_previous_action_embedding, self.embed_token_seq_buckets[i].get_output(), image_embedding, previous_action_embedding, 5, use_softmax, create_copy=self.mix_and_gen_prob) self.mix_and_gen_prob_buckets.append(mix_and_gen_prob_bucket)
def __init__(self, image_dim, num_actions, time_horizon): # Number of actions self.num_actions = num_actions # Neural network for embedding text self.n_text = 250 self.text_embedder = embed_token_seq.EmbedTokenSeq(self.n_text) text_embedding = self.text_embedder.get_output() #################### # Create bucket network self.buckets = [15, 30, 45] self.embed_token_seq_buckets = [] for bucket in self.buckets: embed_token_seq_bucket = \ embed_token_seq.EmbedTokenSeq(self.n_text, num_steps=bucket, create_copy=self.text_embedder) self.embed_token_seq_buckets.append(embed_token_seq_bucket) #################### # Image Preprocessing self.image_preprocessor = image_preprocessing.ImagePreprocessing() # Neural network for embedding image self.n_image = 200 self.image_history_embedder = embed_image_recurrent.EmbedImageRecurrent( self.n_image, image_dim, self.n_image, time_horizon) image_embedding = self.image_history_embedder.get_output() # Network for embedding past action # 6 actions, one for no-action self.n_status_flag_dim = 18 self.n_direction_dim = 24 self.n_blocks_dim = 32 self.n_previous_action_embedding = self.n_status_flag_dim + self.n_direction_dim + self.n_blocks_dim self.null_previous_action = (2, 5, 20) self.previous_action_embedder = epa.EmbedPreviousAction( 3, self.n_status_flag_dim, 6, self.n_direction_dim, 21, self.n_blocks_dim) previous_action_embedding = self.previous_action_embedder.get_output() # Neural network for mixing the embeddings of text # and image and generate probabilities over block-ids and direction use_softmax = True self.mix_and_gen_prob = mix_and_gen_prob.MixAndGenerateProbabilities( self.n_text, self.n_image, self.n_previous_action_embedding, text_embedding, image_embedding, previous_action_embedding, 5, use_softmax) #################### self.mix_and_gen_prob_buckets = [] for i in range(0, len(self.buckets)): mix_and_gen_prob_bucket = mix_and_gen_prob.MixAndGenerateProbabilities( self.n_text, self.n_image, self.n_previous_action_embedding, self.embed_token_seq_buckets[i].get_output(), image_embedding, previous_action_embedding, 5, use_softmax, create_copy=self.mix_and_gen_prob) self.mix_and_gen_prob_buckets.append(mix_and_gen_prob_bucket) #################### # Define input and output self.target = tf.placeholder(dtype=tf.float32, shape=None) self.block_indices = tf.placeholder(dtype=tf.int32, shape=None) self.direction_indices = tf.placeholder(dtype=tf.int32, shape=None) block_prob, direction_prob = self.mix_and_gen_prob.get_joined_probabilities() self.model_output = block_prob, direction_prob self.model_output_indices = self.block_indices, self.direction_indices summary_qval_min = tf.scalar_summary("Direction Prob Min", tf.reduce_min(direction_prob)) summary_qval_max = tf.scalar_summary("Direction Prob Max", tf.reduce_max(direction_prob)) summary_qval_mean = tf.scalar_summary("Direction Prob Mean", tf.reduce_mean(direction_prob)) self.feed_forward_summary = tf.merge_summary([summary_qval_min, summary_qval_max, summary_qval_mean]) self.feed_iter = 0
def __init__(self, n_text, image_dim, n_image, n_direction_dim, n_block_dim, scope_name="Q_network"): # Neural network for embedding text self.n_text = n_text self.text_embedder = embed_token_seq.EmbedTokenSeq(self.n_text, scope_name=scope_name) text_embedding = self.text_embedder.get_output() #################### # Create bucket network self.buckets = [15, 30, 45] self.embed_token_seq_buckets = [] for bucket in self.buckets: embed_token_seq_bucket = \ embed_token_seq.EmbedTokenSeq(self.n_text, num_steps=bucket, create_copy=self.text_embedder, scope_name=scope_name) self.embed_token_seq_buckets.append(embed_token_seq_bucket) #################### # Image Preprocessing self.image_preprocessor = image_preprocessing.ImagePreprocessing() # Neural network for embedding image self.n_image = n_image self.image_embedder = embed_image.EmbedImage(self.n_image, image_dim, scope_name=scope_name) image_embedding = self.image_embedder.get_output() # Network for embedding past action # 6 actions, one for no-action self.n_direction_dim = n_direction_dim self.n_blocks_dim = n_block_dim self.n_previous_action_embedding = self.n_direction_dim + self.n_blocks_dim self.null_previous_action = (5, 20) self.previous_action_embedder = epa.EmbedPreviousAction( 6, self.n_direction_dim, 21, self.n_blocks_dim, scope_name=scope_name) previous_action_embedding = self.previous_action_embedder.get_output() # Neural network for mixing the embeddings of text, image and previous action and generate q values self.mix_and_gen_q_val = mix_and_gen_q_values.MixAndGenerateQValues( self.n_text, self.n_image, self.n_previous_action_embedding, text_embedding, image_embedding, previous_action_embedding, 81, scope_name=scope_name) #################### # TODO BUG self.mix_and_gen_q_val_buckets = [] for i in range(0, len(self.buckets)): mix_and_gen_q_val_bucket = mix_and_gen_q_values.MixAndGenerateQValues( self.n_text, self.n_image, self.n_previous_action_embedding, self.embed_token_seq_buckets[i].get_output(), image_embedding, previous_action_embedding, 81, create_copy=self.mix_and_gen_q_val, scope_name=scope_name) self.mix_and_gen_q_val_buckets.append(mix_and_gen_q_val_bucket) #################### # Define input and output self.target = tf.placeholder(dtype=tf.float32, shape=None) self.model_output = self.mix_and_gen_q_val.get_q_val() self.model_output_indices = tf.placeholder(dtype=tf.int32, shape=None) summary_qval_min = tf.scalar_summary("Q Val Min", tf.reduce_min(self.model_output)) summary_qval_max = tf.scalar_summary("Q Val Max", tf.reduce_max(self.model_output)) summary_qval_mean = tf.scalar_summary("Q Val Mean", tf.reduce_mean(self.model_output)) self.feed_forward_summary = tf.merge_summary([summary_qval_min, summary_qval_max, summary_qval_mean]) self.feed_iter = 0
def __init__(self, image_dim, num_actions, constants): # Number of actions self.num_actions = num_actions # Neural network for embedding text self.n_text = constants["text_hidden_dim"] #CHANGED self.text_embedder = embed_token_seq.EmbedTokenSeq(self.n_text) text_embedding = self.text_embedder.get_output() # Create bucket network for RNN self.buckets = [15, 30, 45] self.embed_token_seq_buckets = [] for bucket in self.buckets: #CHANGED embed_token_seq_bucket = \ embed_token_seq.EmbedTokenSeq(self.n_text, num_steps=bucket, create_copy=self.text_embedder) self.embed_token_seq_buckets.append(embed_token_seq_bucket) # Image Preprocessing self.image_preprocessor = image_preprocessing.ImagePreprocessing() # Neural network for embedding image self.n_image = constants["image_hidden_dim"] #CHANGED self.image_embedder = embed_image.EmbedImage(self.n_image, image_dim) image_embedding = self.image_embedder.get_output() # Network for embedding past action # 5? actions, one for no-action num_blocks = constants["num_block"] self.num_directions = constants["num_direction"] self.n_direction_dim = constants["direction_dim"] self.n_blocks_dim = constants["block_dim"] self.n_previous_action_embedding = self.n_direction_dim + self.n_blocks_dim self.null_previous_action = (self.num_directions + 1, num_blocks) self.previous_action_embedder = epa.EmbedPreviousAction( self.num_directions + 2, self.n_direction_dim, num_blocks + 1, self.n_blocks_dim) previous_action_embedding = self.previous_action_embedder.get_output() # Neural network for mixing the embeddings of text # and image and generate probabilities over block-ids and direction use_softmax = True self.mix_and_gen_prob = mix_and_gen_prob.MixAndGenerateProbabilities( self.n_text, self.n_image, self.n_previous_action_embedding, text_embedding, image_embedding, previous_action_embedding, self.num_directions + 1, use_softmax) # Create buckets self.mix_and_gen_prob_buckets = [] for i in range(0, len(self.buckets)): mix_and_gen_prob_bucket = mix_and_gen_prob.MixAndGenerateProbabilities( self.n_text, self.n_image, self.n_previous_action_embedding, self.embed_token_seq_buckets[i].get_output(), image_embedding, previous_action_embedding, self.num_directions + 1, use_softmax, create_copy=self.mix_and_gen_prob) self.mix_and_gen_prob_buckets.append(mix_and_gen_prob_bucket) # Define input and output self.target = tf.placeholder(dtype=tf.float32, shape=None) self.block_indices = tf.placeholder(dtype=tf.int32, shape=None) self.direction_indices = tf.placeholder(dtype=tf.int32, shape=None) block_prob, direction_prob = self.mix_and_gen_prob.get_joined_probabilities( ) self.model_output = block_prob, direction_prob self.model_output_indices = self.block_indices, self.direction_indices summary_qval_min = tf.scalar_summary("Direction Prob Min", tf.reduce_min(direction_prob)) summary_qval_max = tf.scalar_summary("Direction Prob Max", tf.reduce_max(direction_prob)) summary_qval_mean = tf.scalar_summary("Direction Prob Mean", tf.reduce_mean(direction_prob)) self.feed_forward_summary = tf.merge_summary( [summary_qval_min, summary_qval_max, summary_qval_mean]) self.feed_iter = 0