示例#1
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        graph = nn.Graph([self.m, self.b, self.m2, self.b2])
        input_x = nn.Input(graph, x)
        #============= LAYER 01 ===============#
        xm = nn.MatrixMultiply(graph, input_x, self.m)
        xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b)
        #============= LAYER 02 ===============#
        relu = nn.ReLU(graph, xm_plus_b)
        xm2 = nn.MatrixMultiply(graph, relu, self.m2)
        xm_plus_b2 = nn.MatrixVectorAdd(graph, xm2, self.b2)

        if y is not None:
            input_y = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, xm_plus_b2, input_y)
            return graph
        else:
            return graph.get_output(xm_plus_b2)
示例#2
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        self.graph1 = nn.Graph([self.w1,self.b1,self.w2,self.b2,self.w3,self.b3])
        self.input_x = nn.Input(self.graph1, x)
        if y is not None:
            "*** YOUR CODE HERE ***"
            self.input_y = nn.Input(self.graph1, y)
            
            xw_1 = nn.MatrixMultiply(self.graph1, self.input_x, self.w1)
            xw1_plus_b1 = nn.MatrixVectorAdd(self.graph1, xw_1, self.b1)
            relu_l1 = nn.ReLU(self.graph1, xw1_plus_b1)
            l1w2 = nn.MatrixMultiply(self.graph1, relu_l1, self.w2)
            l1w2_plus_b2 = nn.MatrixVectorAdd(self.graph1, l1w2, self.b2)
            relu_l2 = nn.ReLU(self.graph1, l1w2_plus_b2)
            l2w3 = nn.MatrixMultiply(self.graph1, relu_l2, self.w3)
            l2w3_plus_b3 = nn.MatrixVectorAdd(self.graph1, l2w3, self.b3)
            loss = nn.SoftmaxLoss(self.graph1, l2w3_plus_b3, self.input_y)
            # print('loss shape',l2w3_plus_b3)
            return self.graph1




        else:
            "*** YOUR CODE HERE ***"
            graph2 = nn.Graph([self.w1,self.b1,self.w2,self.b2,self.w3,self.b3])
            input_x = nn.Input(graph2, x)
            xw_1 = nn.MatrixMultiply(graph2, input_x, self.w1)
            xw1_plus_b1 = nn.MatrixVectorAdd(graph2, xw_1, self.b1)
            relu_l1 = nn.ReLU(graph2, xw1_plus_b1)
            l1w2 = nn.MatrixMultiply(graph2, relu_l1, self.w2)
            l1w2_plus_b2 = nn.MatrixVectorAdd(graph2, l1w2, self.b2)
            relu_l2 = nn.ReLU(graph2, l1w2_plus_b2)
            l2w3 = nn.MatrixMultiply(graph2, relu_l2, self.w3)
            l2w3_plus_b3 = nn.MatrixVectorAdd(graph2, l2w3, self.b3)
            

            return graph2.get_output(l2w3_plus_b3)
示例#3
0
    def run(self, xs, y=None):
        """
        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]

        "*** YOUR CODE HERE ***"
        h = nn.Variable(batch_size, self.dimensionality)
        h.data = np.zeros((batch_size, self.dimensionality))
        g = nn.Graph([h, self.w1, self.w2, self.w3, self.b])
        for x in xs:
            h1 = nn.MatrixMultiply(g, h, self.w1)
            x2 = nn.MatrixMultiply(g, nn.Input(g, x), self.w2)
            h1_add_x2 = nn.Add(g, h1, x2)
            add_b = nn.MatrixVectorAdd(g, h1_add_x2, self.b)
            relu = nn.ReLU(g, add_b)
            h = relu
        result = nn.MatrixMultiply(g, h, self.w3)

        if y is not None:
            "*** YOUR CODE HERE ***"
            nn.SoftmaxLoss(g, result, nn.Input(g, y))
            return g
        else:
            "*** YOUR CODE HERE ***"
            return g.get_output(result)
示例#4
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        if len(x) == 1:
            return 0
        if not self.graph:
            w1 = nn.Variable(784, 500)
            w2 = nn.Variable(500, 500)
            w3 = nn.Variable(500, 10)
            b1 = nn.Variable(1, 500)
            b2 = nn.Variable(1, 500)
            b3 = nn.Variable(1, 10)
            self.l = [w1, w2, w3, b1, b2, b3]
            self.graph = nn.Graph(self.l)
        self.graph = nn.Graph(self.l)
        input_x = nn.Input(self.graph, x)  #Tx784
        if y is not None:  #<--- THIS LITTLE CONDITIONAL SO IMPORTANT HFS
            input_y = nn.Input(self.graph, y)
        mult = nn.MatrixMultiply(self.graph, input_x, self.l[0])  #Tx50
        add = nn.MatrixVectorAdd(self.graph, mult, self.l[3])
        relu = nn.ReLU(self.graph, add)
        mult2 = nn.MatrixMultiply(self.graph, relu, self.l[1])  #Tx50
        add2 = nn.MatrixVectorAdd(self.graph, mult2, self.l[4])  #Tx50
        relu2 = nn.ReLU(self.graph, add2)
        mult3 = nn.MatrixMultiply(self.graph, relu2, self.l[2])
        add3 = nn.MatrixVectorAdd(self.graph, mult3, self.l[5])
        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            loss = nn.SoftmaxLoss(self.graph, add3, input_y)
            return self.graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            #print(self.graph.get_output(self.graph.get_nodes()[-1]))
            return self.graph.get_output(self.graph.get_nodes()[-1])
    def run(self, x, y=None):
        """
        TODO: Question 6 - [Application] Digit Classification

        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        #to implement f(x) = relu(x.w1+b1).w2 + b2
        graph = nn.Graph([self.w1, self.b1, self.w2, self.b2])
        input_x = nn.Input(graph, x)
        #input_y = Input(graph, y)
        #a = x.w1
        a = nn.MatrixMultiply(graph, input_x, self.w1)
        #relu(a+b1).w2 + b2
        #b = a + b1
        b = nn.MatrixVectorAdd(graph, a, self.b1)
        #relu(b).w2 + b2
        two_layer_relu = nn.ReLU(graph, b)
        #c = relu(b).w2
        c = nn.MatrixMultiply(graph, two_layer_relu, self.w2)
        #d = c + b2
        d = nn.MatrixVectorAdd(graph, c, self.b2)

        #loss = SquareLoss(graph, xm_plus_b, input_y)

        if y is not None:
            "*** YOUR CODE HERE ***"
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, d, input_y)
            return graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            return graph.get_output(d)
示例#6
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, y is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use nn.SoftmaxLoss as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "* YOUR CODE HERE *"
        graph = nn.Graph([
            self.weight1, self.bias1, self.weight2, self.bias2, self.weight3,
            self.bias3, self.weight4, self.bias4, self.weight5, self.bias5
        ])

        input_x = nn.Input(graph, x)

        xw1 = nn.MatrixMultiply(graph, input_x, self.weight1)
        plus1b1 = nn.MatrixVectorAdd(graph, xw1, self.bias1)
        relu1 = nn.ReLU(graph, plus1b1)

        relu1_2 = nn.MatrixMultiply(graph, relu1, self.weight2)
        plus2b2 = nn.MatrixVectorAdd(graph, relu1_2, self.bias2)
        relu2 = nn.ReLU(graph, plus2b2)

        relu2_3 = nn.MatrixMultiply(graph, relu2, self.weight3)
        plus3b3 = nn.MatrixVectorAdd(graph, relu2_3, self.bias3)
        relu3 = nn.ReLU(graph, plus3b3)

        relu3_4 = nn.MatrixMultiply(graph, relu3, self.weight4)
        plus4b4 = nn.MatrixVectorAdd(graph, relu3_4, self.bias4)
        relu4 = nn.ReLU(graph, plus4b4)

        relu4_5 = nn.MatrixMultiply(graph, relu4, self.weight5)
        plus5b5 = nn.MatrixVectorAdd(graph, relu4_5, self.bias5)

        if y is not None:
            "* YOUR CODE HERE *"
            input_y = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, plus5b5, input_y)
            return graph
        else:
            "* YOUR CODE HERE *"
            return graph.get_output(plus5b5)
示例#7
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        #print("x", x.shape)
        #print("y", y.shape)
        graph = nn.Graph(
            [self.W1, self.W2, self.W3, self.W4, self.W5, self.W6])
        input_x = nn.Input(graph, x)

        #first term
        xW1mult = nn.MatrixMultiply(graph, input_x, self.W1)
        #second term
        xW2mult = nn.MatrixMultiply(graph, input_x, self.W2)
        addW1W2 = nn.Add(graph, xW1mult, xW2mult)
        relu1 = nn.ReLU(graph, addW1W2)
        reluMult = nn.MatrixMultiply(graph, relu1, self.W3)

        xW4mult = nn.MatrixMultiply(graph, input_x, self.W4)
        W4W5mult = nn.MatrixMultiply(graph, xW4mult, self.W5)

        per2Add = nn.Add(graph, reluMult, W4W5mult)
        totalMult = nn.MatrixMultiply(graph, per2Add, self.W6)

        #another term

        #lastRelu = nn.ReLU(graph, totalMult)

        if y is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss_node = nn.SoftmaxLoss(graph, totalMult, input_y)
            return graph

        else:
            "*** YOUR CODE HERE ***"
            return graph.get_output(totalMult)
示例#8
0
文件: models.py 项目: alanzjl/CS188
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        graph = nn.Graph([
            self.m0, self.b0, self.m1, self.b1, self.m2, self.b2, self.m3,
            self.b3, self.m4, self.b4, self.m5, self.b5
        ])
        input_x = nn.Input(graph, x)

        t = nn.MatrixMultiply(graph, input_x, self.m0)
        t = nn.MatrixVectorAdd(graph, t, self.b0)
        t = nn.ReLU(graph, t)
        t = nn.MatrixMultiply(graph, t, self.m1)
        t = nn.MatrixVectorAdd(graph, t, self.b1)
        t = nn.ReLU(graph, t)
        t = nn.MatrixMultiply(graph, t, self.m2)
        t = nn.MatrixVectorAdd(graph, t, self.b2)
        t = nn.ReLU(graph, t)
        t = nn.MatrixMultiply(graph, t, self.m3)
        t = nn.MatrixVectorAdd(graph, t, self.b3)
        t = nn.ReLU(graph, t)
        t = nn.MatrixMultiply(graph, t, self.m4)
        t = nn.MatrixVectorAdd(graph, t, self.b4)
        t = nn.ReLU(graph, t)
        t = nn.MatrixMultiply(graph, t, self.m5)
        t = nn.MatrixVectorAdd(graph, t, self.b5)

        if y is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, t, input_y)
            return graph
        else:
            "*** YOUR CODE HERE ***"
            res = graph.outputs[graph.get_nodes()[-1]]
            return res
示例#9
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        graph = nn.Graph(self.w1_list + self.b1_list + self.w2_list + self.b2_list + self.w3_list + self.b3_list)
        digit_losses = []
        for digit in range(0,10):
            xInput = nn.Input(graph,x)
            layer1 = nn.MatrixMultiply(graph, xInput, self.w1_list[digit])
            layer2 = nn.MatrixVectorAdd(graph, layer1, self.b1_list[digit])
            layer3 = nn.ReLU(graph, layer2)
            layer4 = nn.MatrixMultiply(graph, layer3, self.w2_list[digit])
            layer5 = nn.MatrixVectorAdd(graph, layer4, self.b2_list[digit])
            layer6 = nn.ReLU(graph, layer5)
            layer7 = nn.MatrixMultiply(graph, layer6, self.w3_list[digit])
            layer8 = nn.MatrixVectorAdd(graph, layer7, self.b3_list[digit])
            basis_vector = np.zeros((1, 10))
            basis_vector[0][digit] = 1
            basis_vector_input = nn.Input(graph, basis_vector)
            digit_losses.append(nn.MatrixMultiply(graph, layer8, basis_vector_input))
            if digit == 1:
                digit_losses_matrix = nn.Add(graph, digit_losses[0], digit_losses[1])
            if digit > 1:
                previous = digit_losses_matrix
                digit_losses_matrix = nn.Add(graph, digit_losses_matrix, digit_losses[digit])
 
        if y is not None:
            "*** YOUR CODE HERE ***"
            yInput = nn.Input(graph, y)
            soft_max_layer = nn.SoftmaxLoss(graph, digit_losses_matrix, yInput)
            return graph
        else:
            "*** YOUR CODE HERE ***"
            return graph.get_output(digit_losses_matrix)
示例#10
0
文件: models.py 项目: Karsten12/CS188
    def run(self, x, y=None):
        """
        TODO: Question 6 - [Application] Digit Classification

        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        hidden_layer_size = 200
        if not self.w1:
            self.w1 = nn.Variable(x.shape[1], hidden_layer_size)
        if not self.w2:
            self.w2 = nn.Variable(hidden_layer_size, y.shape[1])
        if not self.b1:
            self.b1 = nn.Variable(hidden_layer_size)
        if not self.b2:
            self.b2 = nn.Variable(y.shape[1])

        g = nn.Graph([self.w1, self.w2, self.b1, self.b2])
        result = nn.MatrixVectorAdd(
            g,
            nn.MatrixMultiply(
                g,
                nn.ReLU(
                    g,
                    nn.MatrixVectorAdd(
                        g, nn.MatrixMultiply(g, nn.Input(g, x), self.w1),
                        self.b1)), self.w2), self.b2)

        if y is not None:
            "*** YOUR CODE HERE ***"
            g.add(nn.SoftmaxLoss(g, result, nn.Input(g, y)))
            return g
        else:
            "*** YOUR CODE HERE ***"
            return g.get_output(result)
示例#11
0
 def get_loss(self, x, y):
     """
     Computes the loss for a batch of examples.
     The correct labels `y` are represented as a node with shape
     (batch_size x 10). Each row is a one-hot vector encoding the correct
     digit class (0-9).
     Inputs:
         x: a node with shape (batch_size x 784)
         y: a node with shape (batch_size x 10)
     Returns: a loss node
     """
     "*** YOUR CODE HERE ***"
     return nn.SoftmaxLoss(self.run(x), y)
示例#12
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        size = x.shape[1]

        if not self.w1:
            self.w1 = nn.Variable(size, 100)
        if not self.w2:
            self.w2 = nn.Variable(100, 10)
        if not self.b1:
            self.b1 = nn.Variable(100)
        if not self.b2:
            self.b2 = nn.Variable(10)

        graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])

        input_x = nn.Input(graph, x)
        xw1 = nn.MatrixMultiply(graph, input_x, self.w1)
        xw1_b1 = nn.MatrixVectorAdd(graph, xw1, self.b1)
        relu = nn.ReLU(graph, xw1_b1)
        xw2 = nn.MatrixMultiply(graph, relu, self.w2)
        xw2_b2 = nn.MatrixVectorAdd(graph, xw2, self.b2)

        if y is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, xw2_b2, input_y)
            return graph

        else:
            return graph.get_output(graph.get_nodes()[-1])
示例#13
0
 def get_loss(self, xs, y):
     """
     Computes the loss for a batch of examples.
     The correct labels `y` are represented as a node with shape
     (batch_size x 5). Each row is a one-hot vector encoding the correct
     language.
     Inputs:
         xs: a list with L elements (one per character), where each element
             is a node with shape (batch_size x self.num_chars)
         y: a node with shape (batch_size x 5)
     Returns: a loss node
     """
     "*** YOUR CODE HERE ***"
     return nn.SoftmaxLoss(self.run(xs), y)
示例#14
0
文件: models.py 项目: Kiranbrar/proj6
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"

        graph = nn.Graph(self.param_w + self.param_b)
        inX = nn.Input(graph, x)
        last = inX

        for i in range(self.num_layers):
            multNode = nn.MatrixMultiply(graph, last, self.param_w[i])
            addNode = nn.MatrixVectorAdd(graph, multNode, self.param_b[i])
            if i != self.num_layers - 1:
                reluNode = nn.ReLU(graph, addNode)
                last = reluNode
            else:
                last = addNode
        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            inY = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, last, inY)
            return graph

        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            return graph.get_output(last)
示例#15
0
    def run(self, x, y=None):
        """
        TODO: Question 6 - [Application] Digit Classification

        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        if len(x) == 1:
            return 0

        if not self.graph:
            w1 = nn.Variable(784, 500)
            w2 = nn.Variable(500, 500)
            w3 = nn.Variable(500, 10)
            b1 = nn.Variable(1, 500)
            b2 = nn.Variable(1, 500)
            b3 = nn.Variable(1, 10)
            self.vars = [w1, w2, w3, b1, b2, b3]
        self.graph = nn.Graph(self.vars)
        input_x = nn.Input(self.graph, x)
        if y is not None:
            input_y = nn.Input(self.graph, y)
        add3 = add_three_edges(input_x, self.graph, self.vars)

        if y is not None:
            "*** YOUR CODE HERE ***"
            loss = nn.SoftmaxLoss(self.graph, add3, input_y)
            return self.graph
        else:
            "*** YOUR CODE HERE ***"
            return self.graph.get_output(self.graph.get_nodes()[-1])
示例#16
0
    def run(self, x, y=None):
        """
        TODO: Question 6 - [Application] Digit Classification

        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        # "*** YOUR CODE HERE ***"
        graph = nn.Graph(
            [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3])
        input_x = nn.Input(graph, x)
        # layer 1
        xm = nn.MatrixMultiply(graph, input_x, self.W1)
        xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b1)
        a1 = nn.ReLU(graph, xm_plus_b)
        # layer 2
        a1m = nn.MatrixMultiply(graph, a1, self.W2)
        a1m_plus_b = nn.MatrixVectorAdd(graph, a1m, self.b2)
        a2 = nn.ReLU(graph, a1m_plus_b)
        # layer 3
        a2m = nn.MatrixMultiply(graph, a2, self.W3)
        a2m_plus_b = nn.MatrixVectorAdd(graph, a2m, self.b3)

        if y is not None:
            # "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, a2m_plus_b, input_y)
            return graph
        else:
            # "*** YOUR CODE HERE ***"
            return graph.get_output(a2m_plus_b)
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        # Implemented based on the equation posted by Yichi Zhang on Piazza:
        # f(x) = W2 * ReLU(W1 * x + b1) + b2
        graph = nn.Graph([self.W1, self.b1, self.W2, self.b2])
        input_x = nn.Input(graph, x)
        W1_x = nn.MatrixMultiply(graph, input_x, self.W1)
        W1_x_plus_b1 = nn.MatrixVectorAdd(graph, W1_x, self.b1)
        relu = nn.ReLU(graph, W1_x_plus_b1)
        W2_relu = nn.MatrixMultiply(graph, relu, self.W2)
        W2_relu_plus_b2 = nn.MatrixVectorAdd(graph, W2_relu, self.b2)

        if y is not None:
            # At training time, the correct output `y` is known.
            # Here, you should construct a loss node, and return the nn.Graph
            # that the node belongs to. The loss node must be the last node
            # added to the graph.
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            W2_relu_plus_b2_loss = nn.SoftmaxLoss(graph, W2_relu_plus_b2,
                                                  input_y)
            return graph
        else:
            # At test time, the correct output is unknown.
            # You should instead return your model's prediction as a numpy array
            "*** YOUR CODE HERE ***"
            return graph.get_output(W2_relu_plus_b2)
示例#18
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        graph = nn.Graph(self.variables)
        input_x = nn.Input(graph, x)
        xw1 = nn.MatrixMultiply(graph, input_x, self.variables[0])
        sumxw1b1 = nn.MatrixVectorAdd(graph, xw1, self.variables[1])
        relu = nn.ReLU(graph, sumxw1b1)
        reluW2 = nn.MatrixMultiply(graph, relu, self.variables[2])
        finalSum = nn.MatrixVectorAdd(graph, reluW2, self.variables[3])
        #relu2 = nn.ReLU(graph, sumRW2b2)
        #mul3 = nn.MatrixMultiply(graph, relu2, self.variables[4])
        #finalSum = nn.MatrixVectorAdd(graph, mul3, self.variables[5])

        if y is not None:
            "*** YOUR CODE HERE ***"
            input_y = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, finalSum, input_y)
            return graph

        else:
            "*** YOUR CODE HERE ***"
            nodes = graph.get_nodes()
            lastnode = nodes[-1]
            out = graph.get_output(lastnode)
            return out
示例#19
0
    def run(self, x, y=None):
        """
        TODO: Question 6 - [Application] Digit Classification

        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """

        graph = nn.Graph(
            [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3])
        input_x = nn.Input(graph, x)
        W1x = nn.MatrixMultiply(graph, input_x, self.W1)
        W1b = nn.MatrixVectorAdd(graph, W1x, self.b1)
        W1Relu = nn.ReLU(graph, W1b)
        W2x = nn.MatrixMultiply(graph, W1Relu, self.W2)
        W2b = nn.MatrixVectorAdd(graph, W2x, self.b2)
        W2Relu = nn.ReLU(graph, W2b)
        W3x = nn.MatrixMultiply(graph, W2Relu, self.W3)
        W3b = nn.MatrixVectorAdd(graph, W3x, self.b3)
        yHat = W3b

        if y is not None:
            input_y = nn.Input(graph, y)
            Loss = nn.SoftmaxLoss(graph, yHat, input_y)
            return graph

        else:
            return graph.get_output(yHat)
示例#20
0
def check_graph_accumulator(tracker):
    # A more thorough test that now requires gradient accumulators to be working
    import nn

    v1 = nn.Variable(1, 5)
    v1_data = np.ones_like(v1.data) / 10
    v1.data = v1_data
    graph = nn.Graph([v1])
    adder = nn.Add(graph, v1, v1)
    assert graph.get_nodes() == [v1, adder], \
        "Not all nodes are present after adding a node."
    assert graph.get_inputs(v1) == [], \
        "Graph.get_inputs should return no inputs for a Variable node"
    assert np.allclose(graph.get_output(v1), v1_data), \
        "Graph.get_output for a Variable should be its data:\n{}\n" \
        "Student returned:\n{}".format(v1_data, graph.get_output(v1))
    expected = [graph.get_output(v1)] * 2
    student = graph.get_inputs(adder)
    for a, b in zip(student, expected):
        assert np.allclose(a, b), "Graph.get_inputs returned incorrect value for an Add node\nStudent returned:\n{}\n" \
                                  "Expected:\n{}".format(a, b)
    assert np.allclose(graph.get_output(adder), 2 * graph.get_output(v1)), \
        "Graph.get_output returned incorrect value for an Add node\nStudent returned:\n{}\nExpected:\n{}"\
        .format(graph.get_output(adder), 2 * graph.get_output(v1))
    loss = nn.SoftmaxLoss(graph, adder, adder)
    for node in [v1, adder]:
        output_shape = graph.get_output(node).shape
        node_grad = graph.get_gradient(node)
        assert node_grad is not None, \
            "Graph.get_gradient returned None, instead of an all-zero value"
        assert np.shape(node_grad) == output_shape, \
            "Graph.get_gradient returned gradient of wrong shape, {0}; expected, {1}".format(np.shape(node_grad),
                                                                                             output_shape)
        assert np.allclose(node_grad, np.zeros_like(node_grad)), "Graph.get_gradient should return all-zero values" \
                                                                 " before backprop is called, instead returned:\n{}"\
            .format(node_grad)

    expected_loss = 1.60943791243
    graph.backprop()
    v1_grad = graph.get_gradient(v1)
    assert np.allclose(v1_grad, np.ones_like(v1_grad) * expected_loss * 2), \
        "Incorrect gradient after running Graph.backprop().\nStudent returned:\n{}\nExpected:\n{}\nMake sure you are" \
        " correctly accumulating your gradients.".format(v1_grad, np.ones_like(v1_grad) * expected_loss * 2)
    tracker.add_points(3)
示例#21
0
    def get_loss(self, x, y):
        """
        Computes the loss for a batch of examples.

        The correct labels `y` are represented as a node with shape
        (batch_size x 10). Each row is a one-hot vector encoding the correct
        digit class (0-9).
        POR EJEMPLO: [0,0,0,0,0,1,0,0,0,0,0] seria la y correspondiente al 5
                     [0,1,0,0,0,0,0,0,0,0,0] seria la y correspondiente al 1

        EN ESTE CASO ESTAMOS HABLANDO DE MULTICLASS, ASI QUE TIENES QUE CALCULAR 
        Inputs:
            x: a node with shape (batch_size x 784)
            y: a node with shape (batch_size x 10)
        Returns: a loss node
        """
        "*** YOUR CODE HERE ***"  #NO ES NECESARIO QUE LO IMPLEMENTEIS, SE OS DA HECHO
        return nn.SoftmaxLoss(
            self.run(x),
            y)  # COMO VEIS LLAMA AL RUN PARA OBTENER POR CADA BATCH
示例#22
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """

        #batch_size = x.shape[0]
        #num_pixels = x.shape[1]

        # set up the graph
        dcGraph = nn.Graph([self.W1, self.b1, self.W2, self.b2])
        input_x = nn.Input(dcGraph, x)
        xW1 = nn.MatrixMultiply(dcGraph, input_x, self.W1)
        xW1_plus_b1 = nn.MatrixVectorAdd(dcGraph, xW1, self.b1)
        ReLU_1 = nn.ReLU(dcGraph, xW1_plus_b1)
        R1W2 = nn.MatrixMultiply(dcGraph, ReLU_1, self.W2)
        R1W2_plus_b2 = nn.MatrixVectorAdd(dcGraph, R1W2, self.b2)

        if y is not None:
            input_y = nn.Input(dcGraph, y)
            R1W2_plus_b2_SML_y = nn.SoftmaxLoss(dcGraph, R1W2_plus_b2, input_y)
            return dcGraph
        else:
            return dcGraph.get_output(R1W2_plus_b2)
示例#23
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        graph = nn.Graph([self.m1, self.b1, self.m2, self.b2])
        input_x = nn.Input(graph, x)

        if y is not None:
            input_y = nn.Input(graph, y)
            xm = nn.MatrixMultiply(graph, input_x, self.m1)
            xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b1)
            loss_1 = nn.ReLU(graph, xm_plus_b)
            loss_1m = nn.MatrixMultiply(graph, loss_1, self.m2)
            loss_1m_plus_b = nn.MatrixVectorAdd(graph, loss_1m, self.b2)
            nn.SoftmaxLoss(graph, loss_1m_plus_b, input_y)
            return graph
        else:
            xm = nn.MatrixMultiply(graph, input_x, self.m1)
            xm_plus_b = nn.MatrixVectorAdd(graph, xm, self.b1)
            loss_1 = nn.ReLU(graph, xm_plus_b)
            loss_1m = nn.MatrixMultiply(graph, loss_1, self.m2)
            loss_1m_plus_b = nn.MatrixVectorAdd(graph, loss_1m, self.b2)
            return graph.get_output(loss_1m_plus_b)
示例#24
0
    def run(self, x, y=None):
        """
        TODO: Question 6 - [Application] Digit Classification

        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        self.graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
        input_x = nn.Input(self.graph, x)

        xm1 = nn.MatrixMultiply(self.graph, input_x, self.w1)
        xm1_plus_b1 = nn.MatrixVectorAdd(self.graph, xm1, self.b1)
        relu = nn.ReLU(self.graph, xm1_plus_b1)
        reluw2 = nn.MatrixMultiply(self.graph, relu, self.w2)
        reluw2_plus_b2 = nn.MatrixVectorAdd(self.graph, reluw2, self.b2)

        if y is not None:
            input_y = nn.Input(self.graph, y)
            loss = nn.SoftmaxLoss(self.graph, reluw2_plus_b2, input_y)
            return self.graph
        else:
            return self.graph.get_output(self.graph.get_nodes()[-1])
示例#25
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        graph = nn.Graph([self.w1, self.b, self.w2, self.b1])
        input_x = nn.Input(graph, x)
        xm_1 = nn.MatrixMultiply(graph, input_x, self.w1)
        add1 = nn.MatrixVectorAdd(graph, xm_1, self.b)
        hidden_output = nn.ReLU(graph, add1)
        mul2 = nn.MatrixMultiply(graph, hidden_output, self.w2)
        add2 = nn.MatrixVectorAdd(graph, mul2, self.b1)

        if y is not None:
            "*** YOUR CODE HERE ***"
            inputY = nn.Input(graph, y)
            loss = nn.SoftmaxLoss(graph, add2, inputY)
            return graph
        else:
            "*** YOUR CODE HERE ***"
            return graph.get_output(add2)
示例#26
0
    def run(self, x, y=None):
        """
        TODO: Question 6 - [Application] Digit Classification

        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"

        if y is not None:
            "*** YOUR CODE HERE ***"
            graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            input_y = nn.Input(graph, y)
            graph, m = self.execute_layer(x, y, graph)
            loss = nn.SoftmaxLoss(graph, m, input_y)
            return graph
        else:
            "*** YOUR CODE HERE ***"
            graph = nn.Graph([self.w1, self.w2, self.b1, self.b2])
            graph, m = self.execute_layer(x, y, graph)
            return graph.get_output(m)
示例#27
0
    def run(self, x, y=None):
        """
        Runs the model for a batch of examples.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 10) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should predict a (batch_size x 10) numpy array of scores,
        where higher scores correspond to greater probability of the image
        belonging to a particular class. You should use `nn.SoftmaxLoss` as your
        training loss.

        Inputs:
            x: a (batch_size x 784) numpy array
            y: a (batch_size x 10) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 10) numpy array of scores (aka logits)
        """
        "*** YOUR CODE HERE ***"
        g = nn.Graph([self.w1, self.b1, self.w2, self.b2])
        x1 = nn.MatrixMultiply(g, nn.Input(g, x), self.w1)
        x1_add_b1 = nn.MatrixVectorAdd(g, x1, self.b1)
        relu = nn.ReLU(g, x1_add_b1)
        x2 = nn.MatrixMultiply(g, relu, self.w2)
        x2_add_b2 = nn.MatrixVectorAdd(g, x2, self.b2)

        if y is not None:
            "*** YOUR CODE HERE ***"
            nn.SoftmaxLoss(g, x2_add_b2, nn.Input(g, y))
            return g
        else:
            "*** YOUR CODE HERE ***"
            return g.get_output(x2_add_b2)
示例#28
0
    def train(self, dataset):
        """
        Trains the model.
        """
        "*** YOUR CODE HERE ***"
        learning_rate = 0.025
        while True:
            for x, y in dataset.iterate_once(self.batch_size):
                prediction = self.run(x)
                pred_loss = nn.SoftmaxLoss(prediction, y)
                fullParams = [
                    self.w_init[0], self.w_init[1], self.w_hidden,
                    self.w_final, self.b_init[0], self.b_init[1], self.b,
                    self.b_final
                ]
                gradient = nn.gradients(pred_loss, fullParams)
                self.w_init[0].update(gradient[0], -1 * learning_rate)
                self.w_init[1].update(gradient[1], -1 * learning_rate)
                self.w_hidden.update(gradient[2], -1 * learning_rate)
                self.w_final.update(gradient[3], -1 * learning_rate)
                self.b_init[0].update(gradient[4], -1 * learning_rate)
                self.b_init[1].update(gradient[5], -1 * learning_rate)
                self.b.update(gradient[6], -1 * learning_rate)
                self.b_final.update(gradient[7], -1 * learning_rate)
            accuracy = dataset.get_validation_accuracy()
            print(accuracy)
            if accuracy > 0.85:
                break
            if 0.75 < accuracy < 0.81:
                learning_rate = 0.01
            if accuracy >= 0.81:
                learning_rate = 0.005
            if accuracy > 0.83:

                learning_rate = 0.0007
        return prediction
示例#29
0
def check_graph_basic(tracker):
    # First test with a basic graph. These tests are designed to pass even with
    # a broken gradient accumulator, so people can get started somewhere.
    import nn

    v1 = nn.Variable(1, 5)
    v1_data = np.ones_like(v1.data)
    v1.data = v1_data.copy()

    v2 = nn.Variable(1, 5)
    v2_data = np.ones_like(v2.data) / 5.0
    v2.data = v2_data.copy()

    graph = nn.Graph([v1, v2])

    g_nodes = graph.get_nodes()
    assert g_nodes is not None, "Graph.get_nodes returned None"
    assert g_nodes == [
        v1, v2
    ], "Graph.get_nodes on newly-constructed graph did not return the variables"
    assert graph.get_inputs(v1) is not None, "Graph.get_inputs returned None"
    assert graph.get_inputs(v2) is not None, "Graph.get_inputs returned None"
    assert graph.get_inputs(
        v1
    ) == [], "Graph.get_inputs should return no inputs for a Variable node"
    assert graph.get_inputs(
        v2
    ) == [], "Graph.get_inputs should return no inputs for a Variable node"

    assert graph.get_output(v1) is not None, "Graph.get_output returned None"
    assert graph.get_output(v2) is not None, "Graph.get_output returned None"
    assert np.allclose(
        graph.get_output(v1),
        v1_data), "Graph.get_output for a Variable should be its data"
    assert np.allclose(
        graph.get_output(v2),
        v2_data), "Graph.get_output for a Variable should be its data"

    loss = nn.SoftmaxLoss(graph, v1, v2)
    assert graph.get_nodes() == [v1, v2, loss],\
        "Not all nodes are present after adding a node"

    loss_inputs = graph.get_inputs(loss)
    loss_inputs_list = []
    try:
        loss_inputs_list = list(loss_inputs)
    except:
        pass
    assert len(loss_inputs_list) == 2,\
        "Graph.get_inputs for SoftmaxLoss node returned {}. Expected: a length-2 list.".format(loss_inputs)
    assert np.allclose(v1.data, v1_data),\
        "Graph appears to have modified a Variable's data, even though step() has never been called"
    assert np.allclose(v2.data, v2_data),\
        "Graph appears to have modified a Variable's data, even though step() has never been called"
    for loss_input, data in zip(loss_inputs, [v1_data, v2_data]):
        assert (isinstance(loss_input, np.ndarray)
                and np.allclose(loss_input, data)),\
                "Graph.get_inputs returned wrong inputs for a SoftmaxLoss node"

    expected_loss = 1.60943791243
    numerical_loss = graph.get_output(loss)
    assert numerical_loss is not None, "Graph.get_output returned None"
    try:
        numerical_loss_float = float(numerical_loss)
    except:
        assert False,\
            "Graph.get_output for SoftmaxLoss returned {}. Expected: a number".format(numerical_loss)
    assert np.isclose(numerical_loss_float, expected_loss),\
        "Graph.get_output for SoftmaxLoss was {}. Expected: {}".format(numerical_loss, expected_loss)

    graph.backprop()

    loss_grad = graph.get_gradient(loss)
    try:
        loss_grad_float = float(loss_grad)
    except:
        assert False,\
            "Graph.get_gradient for the loss node returned {}. Expected: 1.0".format(loss_grad)
    assert np.isclose(loss_grad_float, 1.0),\
        "Graph.get_gradient for the loss node returned {}. Expected: 1.0".format(loss_grad)
    assert np.asarray(loss_grad).dtype.kind == 'f',\
        "Graph.get_gradient for the loss node must return a floating point number. (Did you return an integer?)".format(
            loss_grad, type(loss_grad))

    v1_grad = graph.get_gradient(v1)
    assert v1_grad is not None, "Graph.get_gradient returned None"
    assert v1_grad.shape == v1.data.shape,\
        "Graph.get_gradient returned gradient of wrong shape"

    v2_grad = graph.get_gradient(v2)
    assert v2_grad is not None, "Graph.get_gradient returned None"
    assert v2_grad.shape == v2.data.shape,\
        "Graph.get_gradient returned gradient of wrong shape"

    assert np.allclose(v1_grad, np.zeros_like(v1_grad)),\
        "Incorrect gradient after running Graph.backprop()"
    assert np.allclose(v2_grad, np.ones_like(v2_grad) * expected_loss),\
        "Incorrect gradient after running Graph.backprop()"
    assert np.allclose(v1.data, v1_data),\
        "Graph appears to have modified a Variable's data, even though step() has never been called"
    assert np.allclose(v2.data, v2_data),\
        "Graph appears to have modified a Variable's data, even though step() has never been called"
    graph.step(1.0)
    assert np.allclose(v1.data - v1_data, np.zeros_like(v1_grad)),\
        "Incorrect parameter update after running Graph.step()"
    assert np.allclose(v2.data - v2_data,
                       np.ones_like(v2_grad) * -expected_loss),\
        "Incorrect parameter update after running Graph.step()"

    tracker.add_points(2)
示例#30
0
    def run(self, xs, y=None):
        """
        TODO: Question 8 - [Application] Language Identification

        Runs the model for a batch of examples.

        Although words have different lengths, our data processing guarantees
        that within a single batch, all words will be of the same length (L).

        Here `xs` will be a list of length L. Each element of `xs` will be a
        (batch_size x self.num_chars) numpy array, where every row in the array
        is a one-hot vector encoding of a character. For example, if we have a
        batch of 8 three-letter words where the last word is "cat", we will have
        xs[1][7,0] == 1. Here the index 0 reflects the fact that the letter "a"
        is the inital (0th) letter of our combined alphabet for this task.

        The correct labels are known during training, but not at test time.
        When correct labels are available, `y` is a (batch_size x 5) numpy
        array. Each row in the array is a one-hot vector encoding the correct
        class.

        Your model should use a Recurrent Neural Network to summarize the list
        `xs` into a single node that represents a (batch_size x hidden_size)
        array, for your choice of hidden_size. It should then calculate a
        (batch_size x 5) numpy array of scores, where higher scores correspond
        to greater probability of the word originating from a particular
        language. You should use `nn.SoftmaxLoss` as your training loss.

        Inputs:
            xs: a list with L elements (one per character), where each element
                is a (batch_size x self.num_chars) numpy array
            y: a (batch_size x 5) numpy array, or None
        Output:
            (if y is not None) A nn.Graph instance, where the last added node is
                the loss
            (if y is None) A (batch_size x 5) numpy array of scores (aka logits)

        Hint: you may use the batch_size variable in your code
        """
        batch_size = xs[0].shape[0]

        if not self.assign_var:
            self.W = [
                nn.Variable(self.num_chars, self.hidden_size[0])
                for _ in range(len(xs))
            ]
            self.b = [
                nn.Variable(self.hidden_size[0], self.hidden_size[0])
                for _ in range(len(xs))
            ]
            self.assign_var = True

        graph = nn.Graph(
            [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3] + self.W +
            self.b)
        h = np.zeros((batch_size, self.hidden_size[0]))
        hin = nn.Input(graph, h)
        for n in range(len(xs)):
            input_x = nn.Input(graph, xs[n])
            Wb = nn.MatrixMultiply(graph, input_x, self.W[n])
            Wh = nn.MatrixMultiply(graph, hin, self.b[n])
            Wb_Wh = nn.Add(graph, Wb, Wh)
            hin = nn.ReLU(graph, Wb_Wh)
        W1x = nn.MatrixMultiply(graph, hin, self.W1)
        W1b = nn.MatrixVectorAdd(graph, W1x, self.b1)
        W1Relu = nn.ReLU(graph, W1b)
        W2x = nn.MatrixMultiply(graph, W1Relu, self.W2)
        W2b = nn.MatrixVectorAdd(graph, W2x, self.b2)
        W2Relu = nn.ReLU(graph, W2b)
        W3x = nn.MatrixMultiply(graph, W2Relu, self.W3)
        W3b = nn.MatrixVectorAdd(graph, W3x, self.b3)
        yHat = W3b

        if y is not None:
            input_y = nn.Input(graph, y)
            Loss = nn.SoftmaxLoss(graph, yHat, input_y)
            return graph

        else:
            return graph.get_output(yHat)