def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ graph = nn.Graph([self.m, self.b, self.m2, self.b2]) input_x = nn.Input(graph, x) #============= LAYER 01 ===============# xm = nn.MatrixMultiply(graph, input_x, self.m) xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b) #============= LAYER 02 ===============# relu = nn.ReLU(graph, xm_plus_b) xm2 = nn.MatrixMultiply(graph, relu, self.m2) xm_plus_b2 = nn.MatrixVectorAdd(graph, xm2, self.b2) if y is not None: input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, xm_plus_b2, input_y) return graph else: return graph.get_output(xm_plus_b2)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" self.graph1 = nn.Graph([self.w1,self.b1,self.w2,self.b2,self.w3,self.b3]) self.input_x = nn.Input(self.graph1, x) if y is not None: "*** YOUR CODE HERE ***" self.input_y = nn.Input(self.graph1, y) xw_1 = nn.MatrixMultiply(self.graph1, self.input_x, self.w1) xw1_plus_b1 = nn.MatrixVectorAdd(self.graph1, xw_1, self.b1) relu_l1 = nn.ReLU(self.graph1, xw1_plus_b1) l1w2 = nn.MatrixMultiply(self.graph1, relu_l1, self.w2) l1w2_plus_b2 = nn.MatrixVectorAdd(self.graph1, l1w2, self.b2) relu_l2 = nn.ReLU(self.graph1, l1w2_plus_b2) l2w3 = nn.MatrixMultiply(self.graph1, relu_l2, self.w3) l2w3_plus_b3 = nn.MatrixVectorAdd(self.graph1, l2w3, self.b3) loss = nn.SoftmaxLoss(self.graph1, l2w3_plus_b3, self.input_y) # print('loss shape',l2w3_plus_b3) return self.graph1 else: "*** YOUR CODE HERE ***" graph2 = nn.Graph([self.w1,self.b1,self.w2,self.b2,self.w3,self.b3]) input_x = nn.Input(graph2, x) xw_1 = nn.MatrixMultiply(graph2, input_x, self.w1) xw1_plus_b1 = nn.MatrixVectorAdd(graph2, xw_1, self.b1) relu_l1 = nn.ReLU(graph2, xw1_plus_b1) l1w2 = nn.MatrixMultiply(graph2, relu_l1, self.w2) l1w2_plus_b2 = nn.MatrixVectorAdd(graph2, l1w2, self.b2) relu_l2 = nn.ReLU(graph2, l1w2_plus_b2) l2w3 = nn.MatrixMultiply(graph2, relu_l2, self.w3) l2w3_plus_b3 = nn.MatrixVectorAdd(graph2, l2w3, self.b3) return graph2.get_output(l2w3_plus_b3)
def run(self, xs, y=None): """ Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] "*** YOUR CODE HERE ***" h = nn.Variable(batch_size, self.dimensionality) h.data = np.zeros((batch_size, self.dimensionality)) g = nn.Graph([h, self.w1, self.w2, self.w3, self.b]) for x in xs: h1 = nn.MatrixMultiply(g, h, self.w1) x2 = nn.MatrixMultiply(g, nn.Input(g, x), self.w2) h1_add_x2 = nn.Add(g, h1, x2) add_b = nn.MatrixVectorAdd(g, h1_add_x2, self.b) relu = nn.ReLU(g, add_b) h = relu result = nn.MatrixMultiply(g, h, self.w3) if y is not None: "*** YOUR CODE HERE ***" nn.SoftmaxLoss(g, result, nn.Input(g, y)) return g else: "*** YOUR CODE HERE ***" return g.get_output(result)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" if len(x) == 1: return 0 if not self.graph: w1 = nn.Variable(784, 500) w2 = nn.Variable(500, 500) w3 = nn.Variable(500, 10) b1 = nn.Variable(1, 500) b2 = nn.Variable(1, 500) b3 = nn.Variable(1, 10) self.l = [w1, w2, w3, b1, b2, b3] self.graph = nn.Graph(self.l) self.graph = nn.Graph(self.l) input_x = nn.Input(self.graph, x) #Tx784 if y is not None: #<--- THIS LITTLE CONDITIONAL SO IMPORTANT HFS input_y = nn.Input(self.graph, y) mult = nn.MatrixMultiply(self.graph, input_x, self.l[0]) #Tx50 add = nn.MatrixVectorAdd(self.graph, mult, self.l[3]) relu = nn.ReLU(self.graph, add) mult2 = nn.MatrixMultiply(self.graph, relu, self.l[1]) #Tx50 add2 = nn.MatrixVectorAdd(self.graph, mult2, self.l[4]) #Tx50 relu2 = nn.ReLU(self.graph, add2) mult3 = nn.MatrixMultiply(self.graph, relu2, self.l[2]) add3 = nn.MatrixVectorAdd(self.graph, mult3, self.l[5]) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. loss = nn.SoftmaxLoss(self.graph, add3, input_y) return self.graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array #print(self.graph.get_output(self.graph.get_nodes()[-1])) return self.graph.get_output(self.graph.get_nodes()[-1])
def run(self, x, y=None): """ TODO: Question 6 - [Application] Digit Classification Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" #to implement f(x) = relu(x.w1+b1).w2 + b2 graph = nn.Graph([self.w1, self.b1, self.w2, self.b2]) input_x = nn.Input(graph, x) #input_y = Input(graph, y) #a = x.w1 a = nn.MatrixMultiply(graph, input_x, self.w1) #relu(a+b1).w2 + b2 #b = a + b1 b = nn.MatrixVectorAdd(graph, a, self.b1) #relu(b).w2 + b2 two_layer_relu = nn.ReLU(graph, b) #c = relu(b).w2 c = nn.MatrixMultiply(graph, two_layer_relu, self.w2) #d = c + b2 d = nn.MatrixVectorAdd(graph, c, self.b2) #loss = SquareLoss(graph, xm_plus_b, input_y) if y is not None: "*** YOUR CODE HERE ***" # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, d, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return graph.get_output(d)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, y is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use nn.SoftmaxLoss as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "* YOUR CODE HERE *" graph = nn.Graph([ self.weight1, self.bias1, self.weight2, self.bias2, self.weight3, self.bias3, self.weight4, self.bias4, self.weight5, self.bias5 ]) input_x = nn.Input(graph, x) xw1 = nn.MatrixMultiply(graph, input_x, self.weight1) plus1b1 = nn.MatrixVectorAdd(graph, xw1, self.bias1) relu1 = nn.ReLU(graph, plus1b1) relu1_2 = nn.MatrixMultiply(graph, relu1, self.weight2) plus2b2 = nn.MatrixVectorAdd(graph, relu1_2, self.bias2) relu2 = nn.ReLU(graph, plus2b2) relu2_3 = nn.MatrixMultiply(graph, relu2, self.weight3) plus3b3 = nn.MatrixVectorAdd(graph, relu2_3, self.bias3) relu3 = nn.ReLU(graph, plus3b3) relu3_4 = nn.MatrixMultiply(graph, relu3, self.weight4) plus4b4 = nn.MatrixVectorAdd(graph, relu3_4, self.bias4) relu4 = nn.ReLU(graph, plus4b4) relu4_5 = nn.MatrixMultiply(graph, relu4, self.weight5) plus5b5 = nn.MatrixVectorAdd(graph, relu4_5, self.bias5) if y is not None: "* YOUR CODE HERE *" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, plus5b5, input_y) return graph else: "* YOUR CODE HERE *" return graph.get_output(plus5b5)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" #print("x", x.shape) #print("y", y.shape) graph = nn.Graph( [self.W1, self.W2, self.W3, self.W4, self.W5, self.W6]) input_x = nn.Input(graph, x) #first term xW1mult = nn.MatrixMultiply(graph, input_x, self.W1) #second term xW2mult = nn.MatrixMultiply(graph, input_x, self.W2) addW1W2 = nn.Add(graph, xW1mult, xW2mult) relu1 = nn.ReLU(graph, addW1W2) reluMult = nn.MatrixMultiply(graph, relu1, self.W3) xW4mult = nn.MatrixMultiply(graph, input_x, self.W4) W4W5mult = nn.MatrixMultiply(graph, xW4mult, self.W5) per2Add = nn.Add(graph, reluMult, W4W5mult) totalMult = nn.MatrixMultiply(graph, per2Add, self.W6) #another term #lastRelu = nn.ReLU(graph, totalMult) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss_node = nn.SoftmaxLoss(graph, totalMult, input_y) return graph else: "*** YOUR CODE HERE ***" return graph.get_output(totalMult)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" graph = nn.Graph([ self.m0, self.b0, self.m1, self.b1, self.m2, self.b2, self.m3, self.b3, self.m4, self.b4, self.m5, self.b5 ]) input_x = nn.Input(graph, x) t = nn.MatrixMultiply(graph, input_x, self.m0) t = nn.MatrixVectorAdd(graph, t, self.b0) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m1) t = nn.MatrixVectorAdd(graph, t, self.b1) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m2) t = nn.MatrixVectorAdd(graph, t, self.b2) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m3) t = nn.MatrixVectorAdd(graph, t, self.b3) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m4) t = nn.MatrixVectorAdd(graph, t, self.b4) t = nn.ReLU(graph, t) t = nn.MatrixMultiply(graph, t, self.m5) t = nn.MatrixVectorAdd(graph, t, self.b5) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, t, input_y) return graph else: "*** YOUR CODE HERE ***" res = graph.outputs[graph.get_nodes()[-1]] return res
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" graph = nn.Graph(self.w1_list + self.b1_list + self.w2_list + self.b2_list + self.w3_list + self.b3_list) digit_losses = [] for digit in range(0,10): xInput = nn.Input(graph,x) layer1 = nn.MatrixMultiply(graph, xInput, self.w1_list[digit]) layer2 = nn.MatrixVectorAdd(graph, layer1, self.b1_list[digit]) layer3 = nn.ReLU(graph, layer2) layer4 = nn.MatrixMultiply(graph, layer3, self.w2_list[digit]) layer5 = nn.MatrixVectorAdd(graph, layer4, self.b2_list[digit]) layer6 = nn.ReLU(graph, layer5) layer7 = nn.MatrixMultiply(graph, layer6, self.w3_list[digit]) layer8 = nn.MatrixVectorAdd(graph, layer7, self.b3_list[digit]) basis_vector = np.zeros((1, 10)) basis_vector[0][digit] = 1 basis_vector_input = nn.Input(graph, basis_vector) digit_losses.append(nn.MatrixMultiply(graph, layer8, basis_vector_input)) if digit == 1: digit_losses_matrix = nn.Add(graph, digit_losses[0], digit_losses[1]) if digit > 1: previous = digit_losses_matrix digit_losses_matrix = nn.Add(graph, digit_losses_matrix, digit_losses[digit]) if y is not None: "*** YOUR CODE HERE ***" yInput = nn.Input(graph, y) soft_max_layer = nn.SoftmaxLoss(graph, digit_losses_matrix, yInput) return graph else: "*** YOUR CODE HERE ***" return graph.get_output(digit_losses_matrix)
def run(self, x, y=None): """ TODO: Question 6 - [Application] Digit Classification Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" hidden_layer_size = 200 if not self.w1: self.w1 = nn.Variable(x.shape[1], hidden_layer_size) if not self.w2: self.w2 = nn.Variable(hidden_layer_size, y.shape[1]) if not self.b1: self.b1 = nn.Variable(hidden_layer_size) if not self.b2: self.b2 = nn.Variable(y.shape[1]) g = nn.Graph([self.w1, self.w2, self.b1, self.b2]) result = nn.MatrixVectorAdd( g, nn.MatrixMultiply( g, nn.ReLU( g, nn.MatrixVectorAdd( g, nn.MatrixMultiply(g, nn.Input(g, x), self.w1), self.b1)), self.w2), self.b2) if y is not None: "*** YOUR CODE HERE ***" g.add(nn.SoftmaxLoss(g, result, nn.Input(g, y))) return g else: "*** YOUR CODE HERE ***" return g.get_output(result)
def get_loss(self, x, y): """ Computes the loss for a batch of examples. The correct labels `y` are represented as a node with shape (batch_size x 10). Each row is a one-hot vector encoding the correct digit class (0-9). Inputs: x: a node with shape (batch_size x 784) y: a node with shape (batch_size x 10) Returns: a loss node """ "*** YOUR CODE HERE ***" return nn.SoftmaxLoss(self.run(x), y)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" size = x.shape[1] if not self.w1: self.w1 = nn.Variable(size, 100) if not self.w2: self.w2 = nn.Variable(100, 10) if not self.b1: self.b1 = nn.Variable(100) if not self.b2: self.b2 = nn.Variable(10) graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x = nn.Input(graph, x) xw1 = nn.MatrixMultiply(graph, input_x, self.w1) xw1_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1) relu = nn.ReLU(graph, xw1_b1) xw2 = nn.MatrixMultiply(graph, relu, self.w2) xw2_b2 = nn.MatrixVectorAdd(graph, xw2, self.b2) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, xw2_b2, input_y) return graph else: return graph.get_output(graph.get_nodes()[-1])
def get_loss(self, xs, y): """ Computes the loss for a batch of examples. The correct labels `y` are represented as a node with shape (batch_size x 5). Each row is a one-hot vector encoding the correct language. Inputs: xs: a list with L elements (one per character), where each element is a node with shape (batch_size x self.num_chars) y: a node with shape (batch_size x 5) Returns: a loss node """ "*** YOUR CODE HERE ***" return nn.SoftmaxLoss(self.run(xs), y)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" graph = nn.Graph(self.param_w + self.param_b) inX = nn.Input(graph, x) last = inX for i in range(self.num_layers): multNode = nn.MatrixMultiply(graph, last, self.param_w[i]) addNode = nn.MatrixVectorAdd(graph, multNode, self.param_b[i]) if i != self.num_layers - 1: reluNode = nn.ReLU(graph, addNode) last = reluNode else: last = addNode if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. inY = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, last, inY) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array return graph.get_output(last)
def run(self, x, y=None): """ TODO: Question 6 - [Application] Digit Classification Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" if len(x) == 1: return 0 if not self.graph: w1 = nn.Variable(784, 500) w2 = nn.Variable(500, 500) w3 = nn.Variable(500, 10) b1 = nn.Variable(1, 500) b2 = nn.Variable(1, 500) b3 = nn.Variable(1, 10) self.vars = [w1, w2, w3, b1, b2, b3] self.graph = nn.Graph(self.vars) input_x = nn.Input(self.graph, x) if y is not None: input_y = nn.Input(self.graph, y) add3 = add_three_edges(input_x, self.graph, self.vars) if y is not None: "*** YOUR CODE HERE ***" loss = nn.SoftmaxLoss(self.graph, add3, input_y) return self.graph else: "*** YOUR CODE HERE ***" return self.graph.get_output(self.graph.get_nodes()[-1])
def run(self, x, y=None): """ TODO: Question 6 - [Application] Digit Classification Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ # "*** YOUR CODE HERE ***" graph = nn.Graph( [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3]) input_x = nn.Input(graph, x) # layer 1 xm = nn.MatrixMultiply(graph, input_x, self.W1) xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b1) a1 = nn.ReLU(graph, xm_plus_b) # layer 2 a1m = nn.MatrixMultiply(graph, a1, self.W2) a1m_plus_b = nn.MatrixVectorAdd(graph, a1m, self.b2) a2 = nn.ReLU(graph, a1m_plus_b) # layer 3 a2m = nn.MatrixMultiply(graph, a2, self.W3) a2m_plus_b = nn.MatrixVectorAdd(graph, a2m, self.b3) if y is not None: # "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, a2m_plus_b, input_y) return graph else: # "*** YOUR CODE HERE ***" return graph.get_output(a2m_plus_b)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ # Implemented based on the equation posted by Yichi Zhang on Piazza: # f(x) = W2 * ReLU(W1 * x + b1) + b2 graph = nn.Graph([self.W1, self.b1, self.W2, self.b2]) input_x = nn.Input(graph, x) W1_x = nn.MatrixMultiply(graph, input_x, self.W1) W1_x_plus_b1 = nn.MatrixVectorAdd(graph, W1_x, self.b1) relu = nn.ReLU(graph, W1_x_plus_b1) W2_relu = nn.MatrixMultiply(graph, relu, self.W2) W2_relu_plus_b2 = nn.MatrixVectorAdd(graph, W2_relu, self.b2) if y is not None: # At training time, the correct output `y` is known. # Here, you should construct a loss node, and return the nn.Graph # that the node belongs to. The loss node must be the last node # added to the graph. "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) W2_relu_plus_b2_loss = nn.SoftmaxLoss(graph, W2_relu_plus_b2, input_y) return graph else: # At test time, the correct output is unknown. # You should instead return your model's prediction as a numpy array "*** YOUR CODE HERE ***" return graph.get_output(W2_relu_plus_b2)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" graph = nn.Graph(self.variables) input_x = nn.Input(graph, x) xw1 = nn.MatrixMultiply(graph, input_x, self.variables[0]) sumxw1b1 = nn.MatrixVectorAdd(graph, xw1, self.variables[1]) relu = nn.ReLU(graph, sumxw1b1) reluW2 = nn.MatrixMultiply(graph, relu, self.variables[2]) finalSum = nn.MatrixVectorAdd(graph, reluW2, self.variables[3]) #relu2 = nn.ReLU(graph, sumRW2b2) #mul3 = nn.MatrixMultiply(graph, relu2, self.variables[4]) #finalSum = nn.MatrixVectorAdd(graph, mul3, self.variables[5]) if y is not None: "*** YOUR CODE HERE ***" input_y = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, finalSum, input_y) return graph else: "*** YOUR CODE HERE ***" nodes = graph.get_nodes() lastnode = nodes[-1] out = graph.get_output(lastnode) return out
def run(self, x, y=None): """ TODO: Question 6 - [Application] Digit Classification Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ graph = nn.Graph( [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3]) input_x = nn.Input(graph, x) W1x = nn.MatrixMultiply(graph, input_x, self.W1) W1b = nn.MatrixVectorAdd(graph, W1x, self.b1) W1Relu = nn.ReLU(graph, W1b) W2x = nn.MatrixMultiply(graph, W1Relu, self.W2) W2b = nn.MatrixVectorAdd(graph, W2x, self.b2) W2Relu = nn.ReLU(graph, W2b) W3x = nn.MatrixMultiply(graph, W2Relu, self.W3) W3b = nn.MatrixVectorAdd(graph, W3x, self.b3) yHat = W3b if y is not None: input_y = nn.Input(graph, y) Loss = nn.SoftmaxLoss(graph, yHat, input_y) return graph else: return graph.get_output(yHat)
def check_graph_accumulator(tracker): # A more thorough test that now requires gradient accumulators to be working import nn v1 = nn.Variable(1, 5) v1_data = np.ones_like(v1.data) / 10 v1.data = v1_data graph = nn.Graph([v1]) adder = nn.Add(graph, v1, v1) assert graph.get_nodes() == [v1, adder], \ "Not all nodes are present after adding a node." assert graph.get_inputs(v1) == [], \ "Graph.get_inputs should return no inputs for a Variable node" assert np.allclose(graph.get_output(v1), v1_data), \ "Graph.get_output for a Variable should be its data:\n{}\n" \ "Student returned:\n{}".format(v1_data, graph.get_output(v1)) expected = [graph.get_output(v1)] * 2 student = graph.get_inputs(adder) for a, b in zip(student, expected): assert np.allclose(a, b), "Graph.get_inputs returned incorrect value for an Add node\nStudent returned:\n{}\n" \ "Expected:\n{}".format(a, b) assert np.allclose(graph.get_output(adder), 2 * graph.get_output(v1)), \ "Graph.get_output returned incorrect value for an Add node\nStudent returned:\n{}\nExpected:\n{}"\ .format(graph.get_output(adder), 2 * graph.get_output(v1)) loss = nn.SoftmaxLoss(graph, adder, adder) for node in [v1, adder]: output_shape = graph.get_output(node).shape node_grad = graph.get_gradient(node) assert node_grad is not None, \ "Graph.get_gradient returned None, instead of an all-zero value" assert np.shape(node_grad) == output_shape, \ "Graph.get_gradient returned gradient of wrong shape, {0}; expected, {1}".format(np.shape(node_grad), output_shape) assert np.allclose(node_grad, np.zeros_like(node_grad)), "Graph.get_gradient should return all-zero values" \ " before backprop is called, instead returned:\n{}"\ .format(node_grad) expected_loss = 1.60943791243 graph.backprop() v1_grad = graph.get_gradient(v1) assert np.allclose(v1_grad, np.ones_like(v1_grad) * expected_loss * 2), \ "Incorrect gradient after running Graph.backprop().\nStudent returned:\n{}\nExpected:\n{}\nMake sure you are" \ " correctly accumulating your gradients.".format(v1_grad, np.ones_like(v1_grad) * expected_loss * 2) tracker.add_points(3)
def get_loss(self, x, y): """ Computes the loss for a batch of examples. The correct labels `y` are represented as a node with shape (batch_size x 10). Each row is a one-hot vector encoding the correct digit class (0-9). POR EJEMPLO: [0,0,0,0,0,1,0,0,0,0,0] seria la y correspondiente al 5 [0,1,0,0,0,0,0,0,0,0,0] seria la y correspondiente al 1 EN ESTE CASO ESTAMOS HABLANDO DE MULTICLASS, ASI QUE TIENES QUE CALCULAR Inputs: x: a node with shape (batch_size x 784) y: a node with shape (batch_size x 10) Returns: a loss node """ "*** YOUR CODE HERE ***" #NO ES NECESARIO QUE LO IMPLEMENTEIS, SE OS DA HECHO return nn.SoftmaxLoss( self.run(x), y) # COMO VEIS LLAMA AL RUN PARA OBTENER POR CADA BATCH
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ #batch_size = x.shape[0] #num_pixels = x.shape[1] # set up the graph dcGraph = nn.Graph([self.W1, self.b1, self.W2, self.b2]) input_x = nn.Input(dcGraph, x) xW1 = nn.MatrixMultiply(dcGraph, input_x, self.W1) xW1_plus_b1 = nn.MatrixVectorAdd(dcGraph, xW1, self.b1) ReLU_1 = nn.ReLU(dcGraph, xW1_plus_b1) R1W2 = nn.MatrixMultiply(dcGraph, ReLU_1, self.W2) R1W2_plus_b2 = nn.MatrixVectorAdd(dcGraph, R1W2, self.b2) if y is not None: input_y = nn.Input(dcGraph, y) R1W2_plus_b2_SML_y = nn.SoftmaxLoss(dcGraph, R1W2_plus_b2, input_y) return dcGraph else: return dcGraph.get_output(R1W2_plus_b2)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ graph = nn.Graph([self.m1, self.b1, self.m2, self.b2]) input_x = nn.Input(graph, x) if y is not None: input_y = nn.Input(graph, y) xm = nn.MatrixMultiply(graph, input_x, self.m1) xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b1) loss_1 = nn.ReLU(graph, xm_plus_b) loss_1m = nn.MatrixMultiply(graph, loss_1, self.m2) loss_1m_plus_b = nn.MatrixVectorAdd(graph, loss_1m, self.b2) nn.SoftmaxLoss(graph, loss_1m_plus_b, input_y) return graph else: xm = nn.MatrixMultiply(graph, input_x, self.m1) xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b1) loss_1 = nn.ReLU(graph, xm_plus_b) loss_1m = nn.MatrixMultiply(graph, loss_1, self.m2) loss_1m_plus_b = nn.MatrixVectorAdd(graph, loss_1m, self.b2) return graph.get_output(loss_1m_plus_b)
def run(self, x, y=None): """ TODO: Question 6 - [Application] Digit Classification Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ self.graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_x = nn.Input(self.graph, x) xm1 = nn.MatrixMultiply(self.graph, input_x, self.w1) xm1_plus_b1 = nn.MatrixVectorAdd(self.graph, xm1, self.b1) relu = nn.ReLU(self.graph, xm1_plus_b1) reluw2 = nn.MatrixMultiply(self.graph, relu, self.w2) reluw2_plus_b2 = nn.MatrixVectorAdd(self.graph, reluw2, self.b2) if y is not None: input_y = nn.Input(self.graph, y) loss = nn.SoftmaxLoss(self.graph, reluw2_plus_b2, input_y) return self.graph else: return self.graph.get_output(self.graph.get_nodes()[-1])
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1, self.b, self.w2, self.b1]) input_x = nn.Input(graph, x) xm_1 = nn.MatrixMultiply(graph, input_x, self.w1) add1 = nn.MatrixVectorAdd(graph, xm_1, self.b) hidden_output = nn.ReLU(graph, add1) mul2 = nn.MatrixMultiply(graph, hidden_output, self.w2) add2 = nn.MatrixVectorAdd(graph, mul2, self.b1) if y is not None: "*** YOUR CODE HERE ***" inputY = nn.Input(graph, y) loss = nn.SoftmaxLoss(graph, add2, inputY) return graph else: "*** YOUR CODE HERE ***" return graph.get_output(add2)
def run(self, x, y=None): """ TODO: Question 6 - [Application] Digit Classification Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" if y is not None: "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) input_y = nn.Input(graph, y) graph, m = self.execute_layer(x, y, graph) loss = nn.SoftmaxLoss(graph, m, input_y) return graph else: "*** YOUR CODE HERE ***" graph = nn.Graph([self.w1, self.w2, self.b1, self.b2]) graph, m = self.execute_layer(x, y, graph) return graph.get_output(m)
def run(self, x, y=None): """ Runs the model for a batch of examples. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 10) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should predict a (batch_size x 10) numpy array of scores, where higher scores correspond to greater probability of the image belonging to a particular class. You should use `nn.SoftmaxLoss` as your training loss. Inputs: x: a (batch_size x 784) numpy array y: a (batch_size x 10) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 10) numpy array of scores (aka logits) """ "*** YOUR CODE HERE ***" g = nn.Graph([self.w1, self.b1, self.w2, self.b2]) x1 = nn.MatrixMultiply(g, nn.Input(g, x), self.w1) x1_add_b1 = nn.MatrixVectorAdd(g, x1, self.b1) relu = nn.ReLU(g, x1_add_b1) x2 = nn.MatrixMultiply(g, relu, self.w2) x2_add_b2 = nn.MatrixVectorAdd(g, x2, self.b2) if y is not None: "*** YOUR CODE HERE ***" nn.SoftmaxLoss(g, x2_add_b2, nn.Input(g, y)) return g else: "*** YOUR CODE HERE ***" return g.get_output(x2_add_b2)
def train(self, dataset): """ Trains the model. """ "*** YOUR CODE HERE ***" learning_rate = 0.025 while True: for x, y in dataset.iterate_once(self.batch_size): prediction = self.run(x) pred_loss = nn.SoftmaxLoss(prediction, y) fullParams = [ self.w_init[0], self.w_init[1], self.w_hidden, self.w_final, self.b_init[0], self.b_init[1], self.b, self.b_final ] gradient = nn.gradients(pred_loss, fullParams) self.w_init[0].update(gradient[0], -1 * learning_rate) self.w_init[1].update(gradient[1], -1 * learning_rate) self.w_hidden.update(gradient[2], -1 * learning_rate) self.w_final.update(gradient[3], -1 * learning_rate) self.b_init[0].update(gradient[4], -1 * learning_rate) self.b_init[1].update(gradient[5], -1 * learning_rate) self.b.update(gradient[6], -1 * learning_rate) self.b_final.update(gradient[7], -1 * learning_rate) accuracy = dataset.get_validation_accuracy() print(accuracy) if accuracy > 0.85: break if 0.75 < accuracy < 0.81: learning_rate = 0.01 if accuracy >= 0.81: learning_rate = 0.005 if accuracy > 0.83: learning_rate = 0.0007 return prediction
def check_graph_basic(tracker): # First test with a basic graph. These tests are designed to pass even with # a broken gradient accumulator, so people can get started somewhere. import nn v1 = nn.Variable(1, 5) v1_data = np.ones_like(v1.data) v1.data = v1_data.copy() v2 = nn.Variable(1, 5) v2_data = np.ones_like(v2.data) / 5.0 v2.data = v2_data.copy() graph = nn.Graph([v1, v2]) g_nodes = graph.get_nodes() assert g_nodes is not None, "Graph.get_nodes returned None" assert g_nodes == [ v1, v2 ], "Graph.get_nodes on newly-constructed graph did not return the variables" assert graph.get_inputs(v1) is not None, "Graph.get_inputs returned None" assert graph.get_inputs(v2) is not None, "Graph.get_inputs returned None" assert graph.get_inputs( v1 ) == [], "Graph.get_inputs should return no inputs for a Variable node" assert graph.get_inputs( v2 ) == [], "Graph.get_inputs should return no inputs for a Variable node" assert graph.get_output(v1) is not None, "Graph.get_output returned None" assert graph.get_output(v2) is not None, "Graph.get_output returned None" assert np.allclose( graph.get_output(v1), v1_data), "Graph.get_output for a Variable should be its data" assert np.allclose( graph.get_output(v2), v2_data), "Graph.get_output for a Variable should be its data" loss = nn.SoftmaxLoss(graph, v1, v2) assert graph.get_nodes() == [v1, v2, loss],\ "Not all nodes are present after adding a node" loss_inputs = graph.get_inputs(loss) loss_inputs_list = [] try: loss_inputs_list = list(loss_inputs) except: pass assert len(loss_inputs_list) == 2,\ "Graph.get_inputs for SoftmaxLoss node returned {}. Expected: a length-2 list.".format(loss_inputs) assert np.allclose(v1.data, v1_data),\ "Graph appears to have modified a Variable's data, even though step() has never been called" assert np.allclose(v2.data, v2_data),\ "Graph appears to have modified a Variable's data, even though step() has never been called" for loss_input, data in zip(loss_inputs, [v1_data, v2_data]): assert (isinstance(loss_input, np.ndarray) and np.allclose(loss_input, data)),\ "Graph.get_inputs returned wrong inputs for a SoftmaxLoss node" expected_loss = 1.60943791243 numerical_loss = graph.get_output(loss) assert numerical_loss is not None, "Graph.get_output returned None" try: numerical_loss_float = float(numerical_loss) except: assert False,\ "Graph.get_output for SoftmaxLoss returned {}. Expected: a number".format(numerical_loss) assert np.isclose(numerical_loss_float, expected_loss),\ "Graph.get_output for SoftmaxLoss was {}. Expected: {}".format(numerical_loss, expected_loss) graph.backprop() loss_grad = graph.get_gradient(loss) try: loss_grad_float = float(loss_grad) except: assert False,\ "Graph.get_gradient for the loss node returned {}. Expected: 1.0".format(loss_grad) assert np.isclose(loss_grad_float, 1.0),\ "Graph.get_gradient for the loss node returned {}. Expected: 1.0".format(loss_grad) assert np.asarray(loss_grad).dtype.kind == 'f',\ "Graph.get_gradient for the loss node must return a floating point number. (Did you return an integer?)".format( loss_grad, type(loss_grad)) v1_grad = graph.get_gradient(v1) assert v1_grad is not None, "Graph.get_gradient returned None" assert v1_grad.shape == v1.data.shape,\ "Graph.get_gradient returned gradient of wrong shape" v2_grad = graph.get_gradient(v2) assert v2_grad is not None, "Graph.get_gradient returned None" assert v2_grad.shape == v2.data.shape,\ "Graph.get_gradient returned gradient of wrong shape" assert np.allclose(v1_grad, np.zeros_like(v1_grad)),\ "Incorrect gradient after running Graph.backprop()" assert np.allclose(v2_grad, np.ones_like(v2_grad) * expected_loss),\ "Incorrect gradient after running Graph.backprop()" assert np.allclose(v1.data, v1_data),\ "Graph appears to have modified a Variable's data, even though step() has never been called" assert np.allclose(v2.data, v2_data),\ "Graph appears to have modified a Variable's data, even though step() has never been called" graph.step(1.0) assert np.allclose(v1.data - v1_data, np.zeros_like(v1_grad)),\ "Incorrect parameter update after running Graph.step()" assert np.allclose(v2.data - v2_data, np.ones_like(v2_grad) * -expected_loss),\ "Incorrect parameter update after running Graph.step()" tracker.add_points(2)
def run(self, xs, y=None): """ TODO: Question 8 - [Application] Language Identification Runs the model for a batch of examples. Although words have different lengths, our data processing guarantees that within a single batch, all words will be of the same length (L). Here `xs` will be a list of length L. Each element of `xs` will be a (batch_size x self.num_chars) numpy array, where every row in the array is a one-hot vector encoding of a character. For example, if we have a batch of 8 three-letter words where the last word is "cat", we will have xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a" is the inital (0th) letter of our combined alphabet for this task. The correct labels are known during training, but not at test time. When correct labels are available, `y` is a (batch_size x 5) numpy array. Each row in the array is a one-hot vector encoding the correct class. Your model should use a Recurrent Neural Network to summarize the list `xs` into a single node that represents a (batch_size x hidden_size) array, for your choice of hidden_size. It should then calculate a (batch_size x 5) numpy array of scores, where higher scores correspond to greater probability of the word originating from a particular language. You should use `nn.SoftmaxLoss` as your training loss. Inputs: xs: a list with L elements (one per character), where each element is a (batch_size x self.num_chars) numpy array y: a (batch_size x 5) numpy array, or None Output: (if y is not None) A nn.Graph instance, where the last added node is the loss (if y is None) A (batch_size x 5) numpy array of scores (aka logits) Hint: you may use the batch_size variable in your code """ batch_size = xs[0].shape[0] if not self.assign_var: self.W = [ nn.Variable(self.num_chars, self.hidden_size[0]) for _ in range(len(xs)) ] self.b = [ nn.Variable(self.hidden_size[0], self.hidden_size[0]) for _ in range(len(xs)) ] self.assign_var = True graph = nn.Graph( [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3] + self.W + self.b) h = np.zeros((batch_size, self.hidden_size[0])) hin = nn.Input(graph, h) for n in range(len(xs)): input_x = nn.Input(graph, xs[n]) Wb = nn.MatrixMultiply(graph, input_x, self.W[n]) Wh = nn.MatrixMultiply(graph, hin, self.b[n]) Wb_Wh = nn.Add(graph, Wb, Wh) hin = nn.ReLU(graph, Wb_Wh) W1x = nn.MatrixMultiply(graph, hin, self.W1) W1b = nn.MatrixVectorAdd(graph, W1x, self.b1) W1Relu = nn.ReLU(graph, W1b) W2x = nn.MatrixMultiply(graph, W1Relu, self.W2) W2b = nn.MatrixVectorAdd(graph, W2x, self.b2) W2Relu = nn.ReLU(graph, W2b) W3x = nn.MatrixMultiply(graph, W2Relu, self.W3) W3b = nn.MatrixVectorAdd(graph, W3x, self.b3) yHat = W3b if y is not None: input_y = nn.Input(graph, y) Loss = nn.SoftmaxLoss(graph, yHat, input_y) return graph else: return graph.get_output(yHat)