def add_model(self, window): """Adds the 1-hidden-layer NN. Hint: Use a variable_scope (e.g. "Layer") for the first hidden layer, and another variable_scope (e.g. "Softmax") for the linear transformation preceding the softmax. Make sure to use the xavier_weight_init you defined in the previous part to initialize weights. Hint: Make sure to add in regularization and dropout to this network. Regularization should be an addition to the cost function, while dropout should be added after both variable scopes. Hint: You might consider using a tensorflow Graph Collection (e.g "total_loss") to collect the regularization and loss terms (which you will add in add_loss_op below). Hint: Here are the dimensions of the various variables you will need to create W: (window_size*embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, label_size) b2: (label_size) https://www.tensorflow.org/versions/r0.7/api_docs/python/framework.html#graph-collections Args: window: tf.Tensor of shape (-1, window_size*embed_size) Returns: output: tf.Tensor of shape (batch_size, label_size) """ ### YOUR CODE HERE with tf.variable_scope("layer") as layer_scope: W = tf.get_variable("W_l", shape=[self.config.window_size * self.config.embed_size, self.config.hidden_size], initializer=xavier_weight_init()) b1 = tf.get_variable("b1", shape=[self.config.hidden_size], initializer=tf.constant_initializer(0.0)) variable_summaries(W, W.name) variable_summaries(b1, b1.name) with tf.variable_scope("hidden_layer") as hidden_layer: U = tf.get_variable("U_h", shape=[self.config.hidden_size, self.config.label_size], initializer=xavier_weight_init()) b2 = tf.get_variable("b2", shape=[self.config.label_size], initializer=tf.constant_initializer(0.0)) variable_summaries(U, U.name) variable_summaries(b2, b2.name) h_fc1 = tf.nn.relu(tf.matmul(window, W) + b1) h_fc1 = tf.nn.dropout(h_fc1, self.dropout_placeholder) h_fc2 = tf.matmul(h_fc1, U) + b2 h_fc2 = tf.nn.dropout(h_fc2, self.dropout_placeholder) l2_loss = tf.nn.l2_loss(W) + tf.nn.l2_loss(b1) + tf.nn.l2_loss(U) + tf.nn.l2_loss(b2) tf.add_to_collection(name="l2_loss", value=l2_loss) output = h_fc2 ### END YOUR CODE return output
def add_model(self, window): """Adds the 1-hidden-layer NN. Hint: Use a variable_scope (e.g. "Layer") for the first hidden layer, and another variable_scope (e.g. "Softmax") for the linear transformation preceding the softmax. Make sure to use the xavier_weight_init you defined in the previous part to initialize weights. Hint: Make sure to add in regularization and dropout to this network. Regularization should be an addition to the cost function, while dropout should be added after both variable scopes. Hint: You might consider using a tensorflow Graph Collection (e.g "total_loss") to collect the regularization and loss terms (which you will add in add_loss_op below). Hint: Here are the dimensions of the various variables you will need to create W: (window_size*embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, label_size) b2: (label_size) https://www.tensorflow.org/versions/r0.7/api_docs/python/framework.html#graph-collections Args: window: tf.Tensor of shape (-1, window_size*embed_size) Returns: output: tf.Tensor of shape (batch_size, label_size) """ ### YOUR CODE HERE with tf.variable_scope('Layer'): dim_size = self.config.window_size * self.config.embed_size W = tf.get_variable('W', (dim_size, self.config.hidden_size), initializer=xavier_weight_init()) b1 = tf.get_variable('b1', (self.config.hidden_size,), initializer=xavier_weight_init()) reg_W = 0.5 * self.config.l2 * tf.nn.l2_loss(W) tf.add_to_collection('total_loss', reg_W) layer = tf.matmul(window, W) + b1 layer = tf.nn.dropout(layer, self.dropout_placeholder) with tf.variable_scope('Softmax'): U = tf.get_variable('U', (self.config.hidden_size, self.config.label_size), initializer=xavier_weight_init()) b2 = tf.get_variable('b2', (self.config.label_size,), initializer=xavier_weight_init()) reg_U = 0.5 * self.config.l2 * tf.nn.l2_loss(U) tf.add_to_collection('total_loss', reg_U) output = tf.nn.softmax(tf.matmul(layer, U) + b2) output = tf.nn.dropout(output, self.dropout_placeholder) ### END YOUR CODE return output
def add_model(self, window): """Adds the 1-hidden-layer NN. Hint: Use a variable_scope (e.g. "Layer") for the first hidden layer, and another variable_scope (e.g. "Softmax") for the linear transformation preceding the softmax. Make sure to use the xavier_weight_init you defined in the previous part to initialize weights. Hint: Make sure to add in regularization and dropout to this network. Regularization should be an addition to the cost function, while dropout should be added after both variable scopes. Hint: You might consider using a tensorflow Graph Collection (e.g "total_loss") to collect the regularization and loss terms (which you will add in add_loss_op below). Hint: Here are the dimensions of the various variables you will need to create W: (window_size*embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, label_size) b2: (label_size) https://www.tensorflow.org/versions/r0.7/api_docs/python/framework.html#graph-collections Args: window: tf.Tensor of shape (-1, window_size*embed_size) Returns: output: tf.Tensor of shape (batch_size, label_size) """ ### YOUR CODE HERE with tf.variable_scope("Layer1", initializer = xavier_weight_init()) as scope: w_dim = self.config.window_size * self.config.embed_size w_shape = (w_dim, self.config.hidden_size) W = tf.get_variable("W", w_shape) b1_shape = (self.config.hidden_size,) b1 = tf.Variable(tf.zeros(b1_shape),"b1") #b1 = tf.get_variable("b1", b1_shape) out_layer1 = tf.tanh(tf.matmul(window, W) + b1) out_layer1_drop = tf.nn.dropout(out_layer1, self.dropout_placeholder) # before the softmax layer need add one layer with tf.variable_scope("Softmax", initializer = xavier_weight_init()) as scope: u_dim = (self.config.hidden_size, self.config.label_size) U = tf.get_variable("U",u_dim) b2_shape = (self.config.label_size,) #b2 = tf.get_variable("b2", b2_shape) b2 = tf.Variable(tf.zeros(b2_shape), "b2") output = tf.matmul(out_layer1_drop, U) + b2 l2_loss = self.config.lr *(tf.nn.l2_loss(W) + tf.nn.l2_loss(U)) tf.add_to_collection("total_loss", l2_loss) ### END YOUR CODE return output
def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size(LIBIN edited)). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab)) """ ### YOUR CODE HERE with tf.variable_scope("projection", initializer = xavier_weight_init(), reuse=None): U = tf.get_variable("U", shape=(self.config.hidden_size, len(self.vocab))) b2 = tf.get_variable("b2", shape=(len(self.vocab), )) outputs = [tf.matmul(ts, U) + b2 for ts in rnn_outputs] ### END YOUR CODE return outputs
def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE rnn_outputs = [] self.initial_state = tf.zeros([self.config.batch_size, self.config.hidden_size]) with tf.variable_scope("RNN", initializer=xavier_weight_init(), reuse=None): H = tf.get_variable("H", shape=(self.config.hidden_size, self.config.hidden_size)) I = tf.get_variable("I", shape=(self.config.embed_size, self.config.hidden_size)) b1 = tf.get_variable("b1", shape=(self.config.hidden_size, )) prev_h = self.initial_state for step_input in inputs: step_input = tf.nn.dropout(step_input, self.dropout_placeholder) prev_h = tf.sigmoid(tf.matmul(prev_h, H) + tf.matmul(step_input, I) + b1) #prev_h = tf.nn.dropout(prev_h, self.dropout_placeholder) rnn_outputs.append(prev_h) self.final_state = prev_h ### END YOUR CODE return rnn_outputs
def add_prediction_op(self): """Adds the 1-hidden-layer NN: h = Relu(xW + b1) h_drop = Dropout(h, dropout_rate) pred = h_dropU + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Use the initializer from q2_initialization.py to initialize W and U (you can initialize b1 and b2 with zeros) Hint: Here are the dimensions of the various variables you will need to create W: (n_features*embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, n_classes) b2: (n_classes) Hint: Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x = self.add_embedding() ### YOUR CODE HERE dropout_rate = self.dropout_placeholder W = tf.get_variable(name = "W", shape=[self.config.n_features*self.config.embed_size, self.config.hidden_size], initializer= xavier_weight_init()) #tf.contrib.layers.xavier_initializer()) b1 = tf.Variable(tf.zeros([self.config.hidden_size])) h = tf.nn.relu(tf.matmul(x, W) + b1) h_drop = tf.nn.dropout(h, keep_prob=dropout_rate) U = tf.get_variable(name = "U", shape=[self.config.hidden_size, self.config.n_classes], initializer= xavier_weight_init()) #tf.contrib.layers.xavier_initializer()) b2 = tf.Variable(tf.zeros([self.config.n_classes])) pred = tf.matmul(h_drop, U) + b2 ### END YOUR CODE return pred
def add_prediction_op(self): """Adds the 1-hidden-layer NN: h = Relu(xW + b1) h_drop = Dropout(h, dropout_rate) pred = h_dropU + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Use the initializer from q2_initialization.py to initialize W and U (you can initialize b1 and b2 with zeros) here make use the size of variables: W : (n_features * embed_size, hidden_size) b : (hidden_size,) U : (hidden_size, n_classes) b2 : (n_classes) Hint: Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. Therefore the keep probability should be set to the value of (1 - self.dropout_placeholder) Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x = self.add_embedding() n_features = self.config.n_features embed_size = self.config.embed_size hidden_size = self.config.hidden_size n_classes = self.config.n_classes dropout = self.dropout_placeholder xavier = xavier_weight_init() ### YOUR CODE HERE with tf.variable_scope('transformation'): W = xavier([n_features * embed_size, hidden_size]) self.W = W b1 = tf.Variable(tf.random_uniform([ hidden_size, ])) U = xavier([hidden_size, n_classes]) b2 = tf.Variable(tf.random_uniform([n_classes])) h = tf.nn.relu(tf.matmul(x, W) + b1) h_drop = tf.nn.dropout(h, 1 - dropout) pred = tf.matmul(h_drop, U) + b2 ### END YOUR CODE return pred
def add_prediction_op(self): """Adds the 1-hidden-layer NN: h = Relu(xW + b1) h_drop = Dropout(h, dropout_rate) pred = h_dropU + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Use the initializer from q2_initialization.py to initialize W and U (you can initialize b1 and b2 with zeros) Hint: Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. Therefore the keep probability should be set to the value of (1 - self.dropout_placeholder) Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x = self.add_embedding() ### YOUR CODE HERE xavier_initializer = xavier_weight_init() # hidden layer Weight_1 = tf.Variable(xavier_initializer( (self.config.embed_size * self.config.n_features, self.config.hidden_size)), name="Weight_1") bias_1 = tf.Variable(tf.zeros(self.config.hidden_size), dtype=tf.float32, name="bias_1") hidden_out = tf.nn.relu(tf.matmul(x, Weight_1) + bias_1, name="hidden_out") hidden_drop = tf.nn.dropout(hidden_out, keep_prob=1 - self.dropout_placeholder, name="hidden_drop") # output layer Weight_2 = tf.Variable(xavier_initializer( (self.config.hidden_size, self.config.n_classes)), name="Weight_2") bias_2 = tf.Variable(tf.zeros(self.config.n_classes), dtype=tf.float32, name="bias_2") pred = tf.add(tf.matmul(hidden_drop, Weight_2), bias_2, name="pred") ### END YOUR CODE return pred
def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' with tf.variable_scope('Composition'): ### YOUR CODE HERE self.embedding = tf.get_variable( 'embedding', [len(self.vocab), self.config.embed_size], tf.float32, xavier_weight_init()) #tf.contrib.layers.l2_regularizer(self.config.l2)) self.W1 = tf.get_variable( 'W1', [2 * self.config.embed_size, self.config.embed_size], tf.float32, xavier_weight_init(), tf.contrib.layers.l2_regularizer(self.config.l2)) self.b1 = tf.get_variable('b1', [1, self.config.embed_size], tf.float32, xavier_weight_init()) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE self.U = tf.get_variable( 'U', [self.config.embed_size, self.config.label_size], tf.float32, xavier_weight_init(), tf.contrib.layers.l2_regularizer(self.config.l2)) self.bs = tf.get_variable('bs', [1, self.config.label_size], tf.float32, xavier_weight_init())
def add_model(self, window): """Adds the 1-hidden-layer NN. Hint: Use a variable_scope (e.g. "Layer") for the first hidden layer, and another variable_scope (e.g. "Softmax") for the linear transformation preceding the softmax. Make sure to use the xavier_weight_init you defined in the previous part to initialize weights. Hint: Make sure to add in regularization and dropout to this network. Regularization should be an addition to the cost function, while dropout should be added after both variable scopes. Hint: You might consider using a tensorflow Graph Collection (e.g "total_loss") to collect the regularization and loss terms (which you will add in add_loss_op below). Hint: Here are the dimensions of the various variables you will need to create W: (window_size*embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, label_size) b2: (label_size) https://www.tensorflow.org/versions/r0.7/api_docs/python/framework.html#graph-collections Args: window: tf.Tensor of shape (-1, window_size*embed_size) Returns: output: tf.Tensor of shape (batch_size, label_size) """ ### YOUR CODE HERE xavier_initializer = xavier_weight_init() window_size = self.config.window_size embed_size = self.config.embed_size hidden_size = self.config.hidden_size label_size = self.config.label_size with tf.variable_scope("Layer"): shape = (window_size*embed_size, hidden_size) # W = tf.get_variable("W", shape, initializer=xavier_initializer(shape)) W = tf.get_variable("W", shape, initializer=xavier_initializer) b1 = tf.get_variable("b1", (hidden_size,), initializer=tf.constant_initializer(0.0)) drop = self.dropout_placeholder; with tf.variable_scope("Softmax"): # U = tf.get_variable("U", (hidden_size, label_size), xavier_initializer((hidden_size, label_size))) U = tf.get_variable("U", (hidden_size, label_size), initializer=xavier_initializer) b2 = tf.get_variable("b2", (label_size,), initializer=tf.constant_initializer(0.0)) drop = self.dropout_placeholder output = tf.nn.dropout(tf.matmul(tf.nn.dropout(tf.nn.sigmoid(tf.matmul(window, W)+b1),drop),U)+b2, drop) ### END YOUR CODE return output
def add_prediction_op(self): """Adds the 1-hidden-layer NN: h = Relu(xW + b1) h_drop = Dropout(h, dropout_rate) pred = h_dropU + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Use the initializer from q2_initialization.py to initialize W and U (you can initialize b1 and b2 with zeros) Hint: Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. Therefore the keep probability should be set to the value of (1 - self.dropout_placeholder) Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x = self.add_embedding() ### YOUR CODE HERE n_features = self.config.n_features embed_size = self.config.embed_size feature_size = n_features * embed_size hidden_size = self.config.hidden_size num_class = self.config.n_classes xavier_initializer = xavier_weight_init() self.W = tf.get_variable('q2_parser_W', shape=[feature_size, hidden_size], dtype=tf.float32, initializer=xavier_initializer) self.b1 = tf.get_variable('q2_parser_b1', shape=[hidden_size], dtype=tf.float32, initializer=xavier_initializer) self.U = tf.get_variable('q2_parser_U', shape=[hidden_size, num_class], dtype=tf.float32, initializer=xavier_initializer) self.b2 = tf.get_variable('q2_parser_b2', shape=[num_class], dtype=tf.float32, initializer=xavier_initializer) # damn, forgot the intercept part... h = tf.nn.relu(tf.matmul(x, self.W) + self.b1) h_drop = tf.nn.dropout(h, 1 - self.config.dropout) pred = tf.matmul(h_drop, self.U) + self.b2 ### END YOUR CODE return pred
def add_prediction_op(self): """Adds the 1-hidden-layer NN: h = Relu(xW + b1) h_drop = Dropout(h, dropout_rate) pred = h_dropU + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Use the initializer from q2_initialization.py to initialize W and U (you can initialize b1 and b2 with zeros) Hint: Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. Therefore the keep probability should be set to the value of (1 - self.dropout_placeholder) Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x = self.add_embedding() l2_regularizer = layers.l2_regularizer(self.config.l2) ## YOUR CODE HERE with tf.variable_scope("prediction_op"): xavier_initializer = xavier_weight_init() W = tf.get_variable("W", [ self.config.n_features * self.config.embed_size, self.config.hidden_size ], initializer=xavier_initializer, regularizer=l2_regularizer) b1 = tf.get_variable("b1", [1, self.config.hidden_size], initializer=xavier_initializer, regularizer=l2_regularizer) h = tf.nn.relu(tf.add(tf.matmul(x, W), b1)) h_drop = tf.nn.dropout(h, self.dropout_placeholder) U = tf.get_variable( "U", [self.config.hidden_size, self.config.n_classes], initializer=xavier_initializer, regularizer=l2_regularizer) b2 = tf.get_variable("b2", [1, self.config.n_classes], initializer=xavier_initializer, regularizer=l2_regularizer) pred = tf.add(tf.matmul(h_drop, U), b2) ### END YOUR CODE return pred
def add_model(self): embed_size = self.config.embed_size num_speakers = self.config.speaker_count self.embedded_lines = tf.gather(self.tf_embedding_matrix, self.lines_placeholder) sentence_summary_size = self.add_sentence_summaries(embed_size) conversation_state_size = self.add_conversational_context( sentence_summary_size) with tf.variable_scope("linear_softmax"): W = tf.get_variable("weights", (conversation_state_size, num_speakers), initializer=xavier_weight_init()) b = tf.get_variable("biases", (num_speakers, )) return tf.nn.dropout( tf.matmul(self.conversation_state, W) + b, self.dropout_placeholder) # logits
def init_parameters(self): """Set up parameters Use the initializer from q2_initialization.py to initialize W and U (you can initialize b1 and b2 with zeros) """ ### YOUR CODE HERE zeroInit = dy.ConstInitializer(0.0) xavier = xavier_weight_init() self._pW = self.M.add_parameters((self.config.n_features * self.config.embed_size, self.config.hidden_size)) self._pB1 = self.M.add_parameters((1, self.config.hidden_size), init=zeroInit) self._pU = self.M.add_parameters((self.config.hidden_size, self.config.n_classes)) self._pB2 = self.M.add_parameters((1, self.config.n_classes), init=zeroInit) self.word_dict = self.M.lookup_parameters_from_numpy(self.pretrained_embeddings)
def __init__(self, config): """Constructs the network using the helper functions defined above.""" self.xavier_initializer = xavier_weight_init() self.config = config self.load_data(debug=self.config.debug) self.add_placeholders() window = self.add_embedding() y = self.add_model(window) self.loss = self.add_loss_op(y) self.predictions = tf.nn.softmax(y) one_hot_prediction = tf.argmax(self.predictions, 1) correct_prediction = tf.equal( tf.argmax(self.labels_placeholder, 1), one_hot_prediction) self.correct_predictions = tf.reduce_sum(tf.cast(correct_prediction, 'int32')) self.train_op = self.add_training_op(self.loss)
def add_prediction_op(self): """Adds the 1-hidden-layer NN: h = Relu(xW + b1) h_drop = Dropout(h, dropout_rate) pred = h_dropU + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Use the initializer from q2_initialization.py to initialize W and U (you can initialize b1 and b2 with zeros) Hint: Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. Therefore the keep probability should be set to the value of (1 - self.dropout_placeholder) Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x = self.add_embedding() ### YOUR CODE HERE xavier = xavier_weight_init() # self.W = W = tf.Variable(xavier((self.config.n_features * self.config.embed_size, self.config.hidden_size))) # self.U = U = tf.Variable(xavier((self.config.hidden_size, self.config.n_classes))) # self.b1 = b1 = tf.Variable(tf.zeros(self.config.hidden_size), name='b1') # self.b2 = b2 = tf.Variable(tf.zeros(self.config.n_classes), name='b2') # # h = tf.nn.relu(tf.matmul(x, W) + b1) # h_drop = tf.nn.dropout(h, 1-self.dropout_placeholder) # pred = tf.matmul(h_drop, U) + b2 with tf.variable_scope('transformation'): self.W = W = xavier( (self.config.n_features * self.config.embed_size, self.config.hidden_size)) U = xavier((self.config.hidden_size, self.config.n_classes)) b1 = tf.Variable(tf.random_uniform((self.config.hidden_size, ))) b2 = tf.Variable(tf.random_uniform((self.config.n_classes, ))) h = tf.nn.relu(tf.matmul(x, W) + b1) # dropout = tf.to_float(tf.random_uniform((self.config.hidden_size,), 0, 1) >= self.dropout_placeholder) # h_drop = dropout * h h_drop = tf.nn.dropout(h, 1 - self.dropout_placeholder) pred = tf.matmul(h_drop, U) + b2 ### END YOUR CODE return pred
def add_model(self, window): """Adds the 1-hidden-layer NN. Hint: Use a variable_scope (e.g. "Layer") for the first hidden layer, and another variable_scope (e.g. "Softmax") for the linear transformation preceding the softmax. Make sure to use the xavier_weight_init you defined in the previous part to initialize weights. Hint: Make sure to add in regularization and dropout to this network. Regularization should be an addition to the cost function, while dropout should be added after both variable scopes. Hint: You might consider using a tensorflow Graph Collection (e.g "total_loss") to collect the regularization and loss terms (which you will add in add_loss_op below). Hint: Here are the dimensions of the various variables you will need to create W: (window_size*embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, label_size) b2: (label_size) https://www.tensorflow.org/versions/r0.7/api_docs/python/framework.html#graph-collections Args: window: tf.Tensor of shape (-1, window_size*embed_size) Returns: output: tf.Tensor of shape (batch_size, label_size) """ ### YOUR CODE HERE xavier_initializer = xavier_weight_init() with tf.variable_scope('Layer1') as varscope: W = xavier_initializer((self.config.window_size * self.config.embed_size, self.config.hidden_size),varname="W") b1 = tf.Variable(tf.zeros([self.config.hidden_size], dtype=tf.float32), name="b1") with tf.variable_scope('Softmax') as varscope2: U = xavier_initializer((self.config.hidden_size, self.config.label_size),varname="U") b2 = tf.Variable(tf.zeros([self.config.label_size], dtype=tf.float32), name="b2") h = tf.tanh(tf.matmul(window,W) + b1) dropout_h = tf.nn.dropout(h, self.dropout_placeholder) output = tf.matmul(dropout_h,U) + b2 dropout_output = tf.nn.dropout(output, self.dropout_placeholder) #add regularization loss tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, self.config.l2 * tf.nn.l2_loss(W) + self.config.l2 * tf.nn.l2_loss(U)) ### END YOUR CODE return dropout_output
def predict(self): xavier_initializer = xavier_weight_init() #TODO: create architecture # First layer W1 = tf.Variable(xavier_initializer((self.P*self.n, self.h1))) b1 = tf.Variable(tf.zeros([self.h1])) W2 = tf.Variable(xavier_initializer((self.h1, self.h2))) b2 = tf.Variable(tf.zeros([self.h2])) W3 = tf.Variable(xavier_initializer((self.h2,2))) layer1 = tf.nn.relu(tf.matmul(self.input_placeholders, W1) + b1) layer2 = tf.nn.relu(tf.matmul(layer1, W2) + b2) self.preds = tf.matmul(layer2, W3) return self.preds
def add_prediction_op(self): """Adds the 1-hidden-layer NN: h = Relu(xW + b1) h_drop = Dropout(h, dropout_rate) pred = h_dropU + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Use the initializer from q2_initialization.py to initialize W and U (you can initialize b1 and b2 with zeros) Hint: Here are the dimensions of the various variables you will need to create W: (n_features*embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, n_classes) b2: (n_classes) Hint: Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x = self.add_embedding() ### YOUR CODE HERE xavier_initializer = xavier_weight_init() w_shape = [self.config.n_features * self.config.embed_size, self.config.hidden_size] w = tf.get_variable(name='w', initializer=xavier_initializer(w_shape)) b1_shape = [1, self.config.hidden_size] b1 = tf.get_variable(name='b1', shape=b1_shape, initializer=tf.zeros_initializer()) u_shape = [self.config.hidden_size, self.config.n_classes] u = tf.get_variable(name='u', initializer=xavier_initializer(u_shape)) b2_shape = [1, self.config.n_classes] b2 = tf.get_variable(name='b2', shape=b2_shape, initializer=tf.zeros_initializer()) h = tf.nn.relu(tf.add(tf.matmul(x, w), b1)) h_drop = tf.nn.dropout(h, keep_prob=self.dropout_placeholder) pred = tf.add(tf.matmul(h_drop, u), b2) ### END YOUR CODE return pred
def add_model(self, window): """Adds the 1-hidden-layer NN. Hint: Use a variable_scope (e.g. "Layer") for the first hidden layer, and another variable_scope (e.g. "Softmax") for the linear transformation preceding the softmax. Make sure to use the xavier_weight_init you defined in the previous part to initialize weights. Hint: Make sure to add in regularization and dropout to this network. Regularization should be an addition to the cost function, while dropout should be added after both variable scopes. Hint: You might consider using a tensorflow Graph Collection (e.g "total_loss") to collect the regularization and loss terms (which you will add in add_loss_op below). Hint: Here are the dimensions of the various variables you will need to create W: (window_size*embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, label_size) b2: (label_size) https://www.tensorflow.org/versions/r0.7/api_docs/python/framework.html#graph-collections Args: window: tf.Tensor of shape (-1, window_size*embed_size) Returns: output: tf.Tensor of shape (batch_size, label_size) """ ### YOUR CODE HERE initializer = xavier_weight_init() with tf.name_scope("Layer"): w = tf.Variable( initializer((self.config.window_size * self.config.embed_size, self.config.hidden_size))) b1 = tf.Variable(initializer((self.config.hidden_size, ))) with tf.name_scope("Softmax"): u = tf.Variable( initializer((self.config.hidden_size, self.config.label_size))) b2 = tf.Variable(initializer((self.config.label_size, ))) h1 = tf.add(tf.matmul(window, w), b1) h2 = tf.add(tf.matmul(h1, u), b2) output = h2 ### END YOUR CODE return output
def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE #raise NotImplementedError #code here is inspired/taken from: https://github.com/vijayvee/CS224d_Assignment_2_Solutions shapeU = [self.config.hidden_size, len(self.vocab)] shapeB2 = [len(self.vocab)] xavier_initializer = xavier_weight_init() outputs = [] with tf.variable_scope("Project"): U = tf.get_variable("uWeights", shape=shapeU, initializer=xavier_initializer) b_2 = tf.get_variable("bias2", shape=shapeB2, initializer=xavier_initializer) #For all the rnn_outputs (num_steps) of them, calculate o/p via U and b_2 for i in range(self.config.num_steps): outTensor = tf.matmul(rnn_outputs[i], U) + b_2 outputs.append(outTensor) ### END YOUR CODE return outputs
def add_model(self, window): """Adds the 1-hidden-layer NN. Hint: Use a variable_scope (e.g. "Layer") for the first hidden layer, and another variable_scope (e.g. "Softmax") for the linear transformation preceding the softmax. Make sure to use the xavier_weight_init you defined in the previous part to initialize weights. Hint: Make sure to add in regularization and dropout to this network. Regularization should be an addition to the cost function, while dropout should be added after both variable scopes. Hint: You might consider using a tensorflow Graph Collection (e.g "total_loss") to collect the regularization and loss terms (which you will add in add_loss_op below). Hint: Here are the dimensions of the various variables you will need to create W: (window_size*embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, num_domains) b2: (num_domains) https://www.tensorflow.org/versions/r0.7/api_docs/python/framework.html#graph-collections Args: window: tf.Tensor of shape (-1, window_size*embed_size) Returns: output: tf.Tensor of shape (batch_size, num_domains) """ ### YOUR CODE HERE with tf.variable_scope('Layer') as hidden: W = tf.get_variable('W', (self.config.embed_size, self.config.hidden_size), initializer=xavier_weight_init()) b1 = tf.get_variable('b1', (self.config.hidden_size,), initializer=xavier_weight_init()) h = tf.tanh(tf.matmul(window, W)+ b1) with tf.variable_scope('Score') as score_scope: U = tf.get_variable('U', (self.config.hidden_size, self.config.num_domains), initializer=xavier_weight_init()) h = tf.nn.dropout(h, self.dropout_placeholder) output = tf.matmul(h, U) regularization = self.config.l2*0.5*(tf.reduce_sum(tf.square(W)) + tf.reduce_sum(tf.square(U))) tf.add_to_collection('REGULARIZATION_LOSSES', regularization) ### END YOUR CODE return output
def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): with tf.variable_scope('Embedding_Layer') as scope: embeddings = tf.get_variable( 'embedding', shape=[len(self.vocab), self.config.embed_size], initializer=xavier_weight_init(), trainable=True) window = tf.nn.embedding_lookup(params=embeddings, ids=self.input_placeholder) ############ Returns shape=(?, 10, 50) 10 = number of steps and 50 is embedding size !! window = tf.split(window, self.config.num_steps, axis=1) ###### Splitted into size of chunks of the num of steps=10 #print window for i in range(len(window)): window[i] = tf.squeeze(window[i], [1]) ####################### printing at each step atleast helps in figuring out the shapes and thus macthing can be done !!!! inputs = window return inputs
def add_prediction_op(self): """Adds the 1-hidden-layer NN: h = Relu(xW + b1) h_drop = Dropout(h, dropout_rate) pred = h_dropU + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Use the initializer from q2_initialization.py to initialize W and U (you can initialize b1 and b2 with zeros) Hint: Here are the dimensions of the various variables you will need to create W: (n_features*embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, n_classes) b2: (n_classes) Hint: Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x = self.add_embedding() xavier = xavier_weight_init() with tf.variable_scope("transformation"): self.W = W = xavier([ self.config.n_features * self.config.embed_size, self.config.hidden_size ]) b1 = tf.Variable(tf.random_uniform([self.config.hidden_size])) z1 = tf.matmul(x, W) + b1 h = tf.nn.relu(z1) h_drop = tf.nn.dropout(h, self.config.dropout) U = xavier([self.config.hidden_size, self.config.n_classes]) b2 = tf.Variable(tf.random_uniform([self.config.n_classes])) pred = tf.matmul(h_drop, U) + b2 return pred
def add_prediction_op(self): """Adds the 1-hidden-layer NN: h = Relu(xW + b1) h_drop = Dropout(h, dropout_rate) pred = h_dropU + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Use the initializer from q2_initialization.py to initialize W and U (you can initialize b1 and b2 with zeros) Hint: Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. Therefore the keep probability should be set to the value of (1 - self.dropout_placeholder) Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x = self.add_embedding() ### YOUR CODE HERE xavier_initializer = xavier_weight_init() W = xavier_initializer((self.config.n_features * self.config.embed_size, self.config.hidden_size)) U = xavier_initializer((self.config.hidden_size, self.config.n_classes)) b1 = tf.Variable(tf.zeros((self.config.hidden_size,))) b2 = tf.Variable(tf.zeros((self.config.n_classes,))) if self.config.n_hidden_layers > 1: Ws = [] bs = [] for _ in range(self.config.n_hidden_layers - 1): Ws.append(xavier_initializer((self.config.hidden_size, self.config.hidden_size))) bs.append(tf.Variable(tf.zeros((self.config.hidden_size,)))) h = tf.nn.relu(tf.matmul(x, W) + b1) h_drop = tf.nn.dropout(h, keep_prob=1 - self.dropout_placeholder) if self.config.n_hidden_layers > 1: for i in range(self.config.n_hidden_layers - 1): h_drop = tf.nn.dropout(tf.nn.relu(tf.matmul(h_drop, Ws[i]) + bs[i]), keep_prob=1 - self.dropout_placeholder) pred = tf.matmul(h_drop, U) + b2 ### END YOUR CODE return pred
def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE #raise NotImplementedError ### END YOUR CODE with tf.variable_scope('projection'): U = tf.get_variable( "U", shape=[self.config.hidden_size, len(self.vocab)], initializer=xavier_weight_init()) b_2 = tf.get_variable("b_2", shape=[len(self.vocab)], initializer=tf.constant_initializer(0.0)) outputs = [] for i in range(len(rnn_outputs)): temp = rnn_outputs[i] y_hat = tf.matmul(temp, U) + b_2 outputs.append(y_hat) return outputs
def add_prediction_op(self): """Adds the 1-hidden-layer NN: h = Relu(xW + b1) h_drop = Dropout(h, dropout_rate) pred = h_dropU + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Use the initializer from q2_initialization.py to initialize W and U (you can initialize b1 and b2 with zeros) Hint: Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. Therefore the keep probability should be set to the value of (1 - self.dropout_placeholder) Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x = self.add_embedding() ### YOUR CODE HERE xavier_initializer = xavier_weight_init() shape_w = (self.config.n_features * self.config.embed_size, self.config.hidden_size) shape_u = (self.config.hidden_size, self.config.n_classes) W = xavier_initializer(shape_w) U = xavier_initializer(shape_u) b1 = tf.random_uniform([ self.config.hidden_size, ]) b2 = tf.random_uniform([self.config.n_classes]) mult = tf.matmul(x, W) + b1 h = tf.nn.relu(mult) h_drop = tf.nn.dropout(h, keep_prob=(1 - self.dropout_placeholder)) pred = tf.matmul(h_drop, U) + b2 ### END YOUR CODE return pred
def add_embedding(self): """Add embedding layer that maps from vocabulary to vectors. Creates an embedding tensor (of shape (len(self.wv), embed_size). Use the input_placeholder to retrieve the embeddings for words in the current batch. (Words are discrete entities. They need to be transformed into vectors for use in deep-learning. Although we won't do so in this problem, in practice it's useful to initialize the embedding with pre-trained word-vectors. For this problem, using the default initializer is sufficient.) Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: See following link to understand what -1 in a shape means. https://www.tensorflow.org/versions/r0.8/api_docs/python/array_ops.html#reshape Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.wv), embed_size) Returns: window: tf.Tensor of shape (-1, window_size*embed_size) """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE with tf.variable_scope("embedding_layer") as scope: embedding = tf.get_variable("embedding", [len(self.wv), self.config.embed_size], initializer=xavier_weight_init()) window = tf.nn.embedding_lookup(params=embedding, ids=self.input_placeholder) window = tf.reshape(window, shape=[-1, self.config.window_size * self.config.embed_size], name="window") variable_summaries(window, window.name) ### END YOUR CODE return window
def add_conversational_context(self, sentence_summary_size): # no context is taken into account; classification is based purely on sentence content with tf.variable_scope("hidden_layer"): W = tf.get_variable("weights", (sentence_summary_size, sentence_summary_size), initializer=xavier_weight_init()) b = tf.get_variable("biases", (sentence_summary_size,)) self.conversation_state = tf.tanh(tf.nn.dropout(tf.matmul(self.sentence_summaries, W) + b, self.dropout_placeholder)) return sentence_summary_size
def get_xavier_variable(name, shape): return tf.get_variable(name, shape, initializer=xavier_weight_init())
def add_model(self): embed_size = self.config.embed_size num_speakers = self.config.speaker_count self.embedded_lines = tf.gather(self.tf_embedding_matrix, self.lines_placeholder) sentence_summary_size = self.add_sentence_summaries(embed_size) conversation_state_size = self.add_conversational_context(sentence_summary_size) with tf.variable_scope("linear_softmax"): W = tf.get_variable("weights", (conversation_state_size, num_speakers), initializer=xavier_weight_init()) b = tf.get_variable("biases", (num_speakers,)) return tf.nn.dropout(tf.matmul(self.conversation_state, W) + b, self.dropout_placeholder) # logits