def create_embeddings(self, word_embeddings):
        """Create embeddings that map word, tag, and deprels to vectors

        Embedding layers convert sparse ID representations to dense vector
        representations.

         - Create 3 embedding (2D) tensors, one for each of the input types.
           Input values index the rows of the matrices to extract. The
           (exclusive) max bound on the values in the input can be found in
           {n_word_ids, n_tag_ids, n_deprel_ids}.
         - The word embedding tensors should be initialized with the value of
           the argument word_embeddings; the other two matrices should be
           initialized using the He initializer you implemented.
         - Assign the tensors to self as attributes:
            self.word_embeddings
            self.tag_embeddings
            self.deprel_embeddings
           (Don't change the variable names!)
         - Make sure that gradient recording is enabled for all three embedding
           matrices (see the PyTorch tutorials for more details).

        Args:
            word_embeddings:
                numpy.ndarray of shape (n_word_ids, embed_size) representing
                matrix of pre-trained word embeddings
        """
        # *** BEGIN YOUR CODE ***
        self.word_embeddings = torch.autograd.Variable(torch.tensor(word_embeddings), requires_grad=True)
        self.tag_embeddings = torch.autograd.Variable(he_initializer((self.config.n_tag_ids, self.config.embed_size)),requires_grad=True)
        self.deprel_embeddings = torch.autograd.Variable(he_initializer((self.config.n_deprel_ids, self.config.embed_size)),requires_grad=True)
示例#2
0
    def create_weights_biases(self):
        """Create layer weights and biases for this neural network

        In our single-hidden-layer neural network, our predictions are computed
        as follows from the concatenated embedded input x:
            h = Relu(x W_h + b_h)
            h_drop = Dropout(h, dropout_rate)
            pred = h_drop W_o + b_o
        This method creates the weights and biases W_h, b_h, W_o, and b_o.

        Note that we are not applying a softmax to pred. The softmax will
        instead be done in the get_loss function, which improves efficiency
        because we can use torch.nn.functional.cross_entropy.
        Excluding the softmax in predictions won't change the expected
        transition.

         - Create the tensors mentioned above with the following dimensions:
            W_h: (N * embed_size, hidden_size)
            b_h: (hidden_size,)
            W_o: (hidden_size, n_classes)
            b_o: (n_classes,)
           where N = n_word_features + n_tag_features + n_deprel_features
         - Weight matrices should be initialized with the He initializer you
           implemented; bias vectors should be initialized to zeros.
         - Assign the weights and biases to self as attributes:
            self.W_h
            self.b_h
            self.W_o
            self.b_o
           (Don't change the variable names!)
         - Make sure that gradient recording is enabled for all of the weight
           and bias tensors (see the PyTorch tutorials for more details).
        """
        # *** BEGIN YOUR CODE ***
        N = self.config.n_word_features + self.config.n_word_features + self.config.n_deprel_features

        #W_h
        self.W_h = he_initializer(
            (N * self.config.embed_size, self.config.hidden_size))

        #b_h
        self.b_h = torch.zeros(self.config.hidden_size, requires_grad=True)

        #W_o
        self.W_o = he_initializer(
            (self.config.hidden_size, self.config.n_classes))

        #b_o
        self.b_o = torch.zeros((self.config.n_classes), requires_grad=True)