def test_training():
    x = nd.ones((10, 10))
    with train_section():
        y = nd.Dropout(x, p=0.5)
        assert not (y.asnumpy() == x.asnumpy()).all()
        with test_section():
            y = nd.Dropout(x, p=0.5)
            assert (y.asnumpy() == x.asnumpy()).all()
def test_training():
    x = nd.ones((10, 10))
    with record():
        y = nd.Dropout(x, p=0.5)
        assert not (y.asnumpy() == x.asnumpy()).all()
        with pause():
            y = nd.Dropout(x, p=0.5)
            assert (y.asnumpy() == x.asnumpy()).all()
Пример #3
0
def biLSTM(f_lstm, b_lstm, inputs, batch_size=None, dropout_x=0., dropout_h=0.):
    """Feature extraction through BiLSTM

    Parameters
    ----------
    f_lstm : VariationalDropoutCell
        Forward cell
    b_lstm : VariationalDropoutCell
        Backward cell
    inputs : NDArray
        seq_len x batch_size
    dropout_x : float
        Variational dropout on inputs
    dropout_h :
        Not used

    Returns
    -------
    outputs : NDArray
        Outputs of BiLSTM layers, seq_len x 2 hidden_dims x batch_size
    """
    for f, b in zip(f_lstm, b_lstm):
        inputs = nd.Dropout(inputs, dropout_x, axes=[0])  # important for variational dropout
        fo, fs = f.unroll(length=inputs.shape[0], inputs=inputs, layout='TNC', merge_outputs=True)
        bo, bs = b.unroll(length=inputs.shape[0], inputs=inputs.flip(axis=0), layout='TNC', merge_outputs=True)
        f.reset(), b.reset()
        inputs = nd.concat(fo, bo.flip(axis=0), dim=2)
    return inputs
Пример #4
0
    def forward(self, x):
        x = self.fc2(x)
        x = F.relu(x)
        x = F.Dropout(x)
        x = self.fc3(x)

        return x
Пример #5
0
    def network(X,dropout=0.0):
        
        #encoder
        H1 = nd.Activation(nd.FullyConnected(data=X , weight=W1 , bias=B1 , num_hidden=num_hidden1), act_type="sigmoid")
        H1 = nd.Dropout(data=H1 , p=dropout) # apply dropout layer!!!
        H2 = nd.Activation(nd.FullyConnected(data=H1 , weight=W2 , bias=B2 , num_hidden=num_hidden2), act_type="sigmoid")
        H2 = nd.Dropout(data=H2 , p=dropout) # apply dropout layer!!!

        #decoder
        H3 = nd.Activation(nd.FullyConnected(data=H2 , weight=W3 , bias=B3 , num_hidden=num_hidden1_), act_type="sigmoid")
        H3 = nd.Dropout(data=H3 , p=dropout) # apply dropout layer!!!
        H4 = nd.Activation(nd.FullyConnected(data=H3 , weight=W4 , bias=B4 , num_hidden=num_hidden2_), act_type="sigmoid")
        H4 = nd.Dropout(data=H4 , p=dropout) # apply dropout layer!!!
        H5 = nd.Activation(nd.FullyConnected(data=H4 , weight=W5 , bias=B5 , num_hidden=num_outputs), act_type="sigmoid")
        out = H5
        return out
Пример #6
0
    def hybrid_forward(self, F, x):
        x = self.conv_stem(x)
        x = self.bn1(x)
        x = self.act1(x)
        x = self.blocks(x)
        x = self.global_pool(x)
        x = self.conv_head(x)
        x = self.act2(x)

        if self.dropout > 0.:
            x = F.Dropout(x, p=self.dropout, mode='training')
        x = self.classifier(x)
        return x
Пример #7
0
    def network(X,drop_rate=0.0): # formula : output_size=((input−weights+2*Padding)/Stride)+1
        #data size 
        # MNIST,FashionMNIST = (batch size , 1 , 28 ,  28)
        # CIFAR = (batch size , 3 , 32 ,  32)

        C_H1=nd.Activation(data= nd.Convolution(data=X , weight = W1 , bias = B1 , kernel=(3,3) , stride=(1,1)  , num_filter=60) , act_type="relu") # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30) 
        P_H1=nd.Pooling(data = C_H1 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15)
        C_H2=nd.Activation(data= nd.Convolution(data=P_H1 , weight = W2 , bias = B2 , kernel=(6,6) , stride=(1,1) , num_filter=30), act_type="relu") # MNIST :  result = ( batch size , 30 , 8 , 8), CIFAR10 :  result = ( batch size , 30 , 10 , 10)
        P_H2=nd.Pooling(data = C_H2 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5)
        P_H2 = nd.flatten(data=P_H2)

        '''FullyConnected parameter
        • data: (batch_size, input_dim)
        • weight: (num_hidden, input_dim)
        • bias: (num_hidden,)
        • out: (batch_size, num_hidden)
        '''
        F_H1 =nd.Activation(nd.FullyConnected(data=P_H2 , weight=W3 , bias=B3 , num_hidden=120),act_type="sigmoid")
        F_H1 =nd.Dropout(data=F_H1, p=drop_rate)
        F_H2 =nd.Activation(nd.FullyConnected(data=F_H1 , weight=W4 , bias=B4 , num_hidden=64),act_type="sigmoid")
        F_H2 =nd.Dropout(data=F_H2, p=drop_rate)
        softmax_Y = nd.softmax(nd.FullyConnected(data=F_H2 ,weight=W5 , bias=B5 , num_hidden=10))
        return softmax_Y
Пример #8
0
def biLSTM(f_lstm, b_lstm, inputs, batch_size=None, dropout_x=0., dropout_h=0.):
    """
    Feature extraction through BiLSTM
    :param inputs: # seq_len x batch_size
    :param batch_size:
    :return: Outputs of BiLSTM layers, seq_len x 2 hidden_dims x batch_size
    """
    for f, b in zip(f_lstm, b_lstm):
        inputs = nd.Dropout(inputs, dropout_x, axes=[0])  # important for variational dropout
        fo, fs = f.unroll(length=inputs.shape[0], inputs=inputs, layout='TNC', merge_outputs=True)
        bo, bs = b.unroll(length=inputs.shape[0], inputs=inputs.flip(axis=0), layout='TNC', merge_outputs=True)
        f.reset(), b.reset()
        inputs = nd.concat(fo, bo.flip(axis=0), dim=2)
    return inputs
Пример #9
0
    def forward(self, sentences: List[Sentence], embed_ctx=None, dropout=None) -> Tuple[nd.NDArray, nd.NDArray, List]:
        """

        :param sentences:
        :return: features, tags, lengths
        """
        longest_token_sequence_in_batch = len(max(sentences, key=len))

        self.embeddings.embed(sentences, ctx=None if not embed_ctx else mx.cpu())

        all_sentence_tensors = []
        lengths = []
        tag_list = []

        padding = nd.zeros((1, self.embeddings.embedding_length), dtype='float32')

        for sentence in sentences:

            # get the tags in this sentence
            tag_idx = []

            lengths.append(len(sentence.tokens))

            word_embeddings = []

            for token in sentence:
                # get the tag
                tag_idx.append(self.tag_dictionary.get_idx_for_item(token.get_tag(self.tag_type)))
                # get the word embeddings
                embedding = token.get_embedding().reshape((1, -1))
                if embed_ctx:
                    embedding = embedding.as_in_context(embed_ctx)
                word_embeddings.append(embedding)

            # pad shorter sentences out
            for add in range(longest_token_sequence_in_batch - len(sentence.tokens)):
                word_embeddings.append(padding)

            word_embeddings_tensor = nd.concat(*word_embeddings, dim=0)

            # if torch.cuda.is_available():
            #     tag_list.append(torch.cuda.LongTensor(tag_idx))
            # else:
            tag_list.append(nd.array(tag_idx))

            all_sentence_tensors.append(word_embeddings_tensor.expand_dims(1))

        # padded tensor for entire batch
        sentence_tensor = nd.concat(*all_sentence_tensors, dim=1)  # (IN, NN, C)
        # if torch.cuda.is_available():
        #     sentence_tensor = sentence_tensor.cuda()

        # --------------------------------------------------------------------
        # FF PART
        # --------------------------------------------------------------------
        sentence_tensor = self.dropout(sentence_tensor)

        if self.relearn_embeddings:
            sentence_tensor = self.embedding2nn(sentence_tensor)

        if self.use_rnn:
            # packed = torch.nn.utils.rnn.pack_padded_sequence(sentence_tensor, lengths)

            sentence_tensor = self.rnn(sentence_tensor)

            # sentence_tensor, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(rnn_output)

            sentence_tensor = self.dropout(sentence_tensor)

        if dropout:
            sentence_tensor = nd.Dropout(sentence_tensor, dropout, mode='always')
        features = self.linear(sentence_tensor)
        tags = nd.zeros((len(tag_list), longest_token_sequence_in_batch), dtype='int32')
        for i, (t, l) in enumerate(zip(tag_list, lengths)):
            tags[i, :l] = t
        return features.transpose([1, 0, 2]), tags, lengths
Пример #10
0
    def network(
        X,
        drop_rate=0.0
    ):  # formula : output_size=((input−weights+2*Padding)/Stride)+1
        #data size
        # MNIST,FashionMNIST = (batch size , 1 , 28 ,  28)
        # CIFAR = (batch size , 3 , 32 ,  32)

        # builtin The BatchNorm function moving_mean, moving_var does not work.
        C_H1 = nd.Activation(
            data=nd.BatchNorm(data=nd.Convolution(data=X,
                                                  weight=W1,
                                                  bias=B1,
                                                  kernel=(3, 3),
                                                  stride=(1, 1),
                                                  num_filter=60),
                              gamma=gamma1,
                              beta=beta1,
                              moving_mean=ma1,
                              moving_var=mv1,
                              momentum=0.9,
                              fix_gamma=False,
                              use_global_stats=True),
            act_type="relu"
        )  # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30)
        P_H1 = nd.Pooling(
            data=C_H1, pool_type="avg", kernel=(2, 2), stride=(2, 2)
        )  # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15)
        C_H2 = nd.Activation(
            data=nd.BatchNorm(data=nd.Convolution(data=P_H1,
                                                  weight=W2,
                                                  bias=B2,
                                                  kernel=(6, 6),
                                                  stride=(1, 1),
                                                  num_filter=30),
                              gamma=gamma2,
                              beta=beta2,
                              moving_mean=ma2,
                              moving_var=mv2,
                              momentum=0.9,
                              fix_gamma=False,
                              use_global_stats=True),
            act_type="relu"
        )  # MNIST :  result = ( batch size , 30 , 8 , 8), CIFAR10 :  result = ( batch size , 30 , 10 , 10)
        P_H2 = nd.Pooling(
            data=C_H2, pool_type="avg", kernel=(2, 2), stride=(2, 2)
        )  # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5)
        P_H2 = nd.flatten(data=P_H2)
        '''FullyConnected parameter
        • data: (batch_size, input_dim)
        • weight: (num_hidden, input_dim)
        • bias: (num_hidden,)
        • out: (batch_size, num_hidden)
        '''
        F_H1 = nd.Activation(nd.BatchNorm(data=nd.FullyConnected(
            data=P_H2, weight=W3, bias=B3, num_hidden=120),
                                          gamma=gamma3,
                                          beta=beta3,
                                          moving_mean=ma3,
                                          moving_var=mv3,
                                          momentum=0.9,
                                          fix_gamma=False,
                                          use_global_stats=True),
                             act_type="relu")
        F_H1 = nd.Dropout(data=F_H1, p=drop_rate)
        F_H2 = nd.Activation(nd.BatchNorm(data=nd.FullyConnected(
            data=F_H1, weight=W4, bias=B4, num_hidden=64),
                                          gamma=gamma4,
                                          beta=beta4,
                                          moving_mean=ma4,
                                          moving_var=mv4,
                                          momentum=0.9,
                                          fix_gamma=False,
                                          use_global_stats=True),
                             act_type="relu")
        F_H2 = nd.Dropout(data=F_H2, p=drop_rate)
        #softmax_Y = nd.softmax(nd.FullyConnected(data=F_H2 ,weight=W5 , bias=B5 , num_hidden=10))
        out = nd.FullyConnected(data=F_H2, weight=W5, bias=B5, num_hidden=10)
        return out