예제 #1
0
    def _to_zoo_input(self, input, is_constant=None):
        is_parameter = True if input.name in self._initializer else False
        if isinstance(input.zvalue, zautograd.Variable) or isinstance(
                input.zvalue, zautograd.Parameter):
            return input
        if isinstance(input.zvalue, np.ndarray):
            if is_parameter or is_constant:
                shape = input.zvalue.shape
            else:
                shape = input.zvalue.shape[1:]
        elif isinstance(input.zvalue, list):
            if is_parameter or is_constant:
                shape = input.zvalue
            else:
                shape = input.zvalue[1:]
        else:
            raise Exception("not supported type " + str(type(input.zvalue)))

        input.data = input.zvalue
        if is_constant:
            input.zvalue = zautograd.Parameter(shape=shape,
                                               init_weight=input.zvalue,
                                               trainable=False)
        elif is_parameter:
            input.zvalue = zautograd.Parameter(
                shape=shape,
                init_weight=input.zvalue,
            )
        else:
            input.zvalue = zlayers.Input(shape=shape, name=input.name)
        return input
예제 #2
0
 def test_parameter_create(self):
     w = auto.Parameter(shape=(3, 2))
     value = w.get_weight()
     w.set_weight(value)
     x = auto.Variable(input_shape=(3,))
     b = auto.Parameter(shape=(2,))
     out = auto.mm(x, w, axes=(1, 0)) + b
     model = Model(input=x, output=out)
     input_data = np.random.uniform(0, 1, (4, 3))
     model.forward(input_data)
예제 #3
0
    def block(self, x, size):
        g = auto.Parameter(shape=(1, size), init_weight=np.ones((1, size), dtype=self.bigdl_type))
        b = auto.Parameter(shape=(1, size), init_weight=np.zeros((1, size), dtype=self.bigdl_type))
        g2 = auto.Parameter(shape=(1, size), init_weight=np.ones((1, size), dtype=self.bigdl_type))
        b2 = auto.Parameter(shape=(1, size), init_weight=np.zeros((1, size), dtype=self.bigdl_type))

        a = self.multi_head_self_attention(x, size)
        n = self.layer_norm(x + a, w=g, b=b)
        m = self.mlp(n, size)
        h = self.layer_norm(n + m, w=g2, b=b2)
        return h
예제 #4
0
    def block(self, x, size, attention_mask=None, eplision=1e-5):
        g = auto.Parameter(shape=(1, size),
                           init_weight=np.ones((1, size),
                                               dtype=self.bigdl_type))
        b = auto.Parameter(shape=(1, size),
                           init_weight=np.zeros((1, size),
                                                dtype=self.bigdl_type))
        g2 = auto.Parameter(shape=(1, size),
                            init_weight=np.ones((1, size),
                                                dtype=self.bigdl_type))
        b2 = auto.Parameter(shape=(1, size),
                            init_weight=np.zeros((1, size),
                                                 dtype=self.bigdl_type))

        a = self.multi_head_self_attention(x, size, attention_mask)
        n = layer_norm(x + a, w=g, b=b, e=eplision)
        m = self.mlp(n, size)
        h = layer_norm(n + m, w=g2, b=b2, e=eplision)
        return h
예제 #5
0
    def init(cls,
             vocab=40990,
             hidden_size=768,
             n_block=12,
             n_head=12,
             seq_len=512,
             intermediate_size=3072,
             hidden_drop=0.1,
             attn_drop=0.1,
             initializer_range=0.02,
             output_all_block=True,
             bigdl_type="float"):
        """
        vocab: vocabulary size of training data, default is 40990
        hidden_size: size of the encoder layers, default is 768
        n_block: block number, default is 12
        n_head: head number, default is 12
        seq_len: max sequence length of training data, default is 77
        intermediate_size: The size of the "intermediate" (i.e., feed-forward)
        hidden_drop: drop probability of full connected layers, default is 0.1
        attn_drop: drop probability of attention, default is 0.1
        initializer_ranger: weight initialization range, default is 0.02
        output_all_block: whether output all blocks' output, default is True
        """
        word_input = Input(shape=(seq_len, ))
        token_type_input = Input(shape=(seq_len, ))
        position_input = Input(shape=(seq_len, ))
        word_embedding = Embedding(vocab,
                                   hidden_size,
                                   input_length=seq_len,
                                   weights=np.random.normal(
                                       0.0, initializer_range,
                                       (vocab, hidden_size)))(word_input)
        position_embedding = Embedding(
            seq_len,
            hidden_size,
            input_length=seq_len,
            weights=np.random.normal(0.0, initializer_range,
                                     (seq_len, hidden_size)))(position_input)
        token_type_embedding = Embedding(
            2,
            hidden_size,
            input_length=seq_len,
            weights=np.random.normal(0.0, initializer_range,
                                     (2, hidden_size)))(token_type_input)
        embedding = word_embedding + position_embedding + token_type_embedding

        w = auto.Parameter(shape=(1, hidden_size),
                           init_weight=np.ones((1, hidden_size),
                                               dtype=bigdl_type))
        b = auto.Parameter(shape=(1, hidden_size),
                           init_weight=np.zeros((1, hidden_size),
                                                dtype=bigdl_type))
        after_norm = layer_norm(embedding, w, b, 1e-12)
        h = Dropout(hidden_drop)(after_norm)

        embedding_layer = Model([word_input, token_type_input, position_input],
                                h)
        shape = ((seq_len, ), (seq_len, ), (seq_len, ), (1, 1, seq_len))

        return BERT(n_block,
                    n_head,
                    intermediate_size,
                    hidden_drop,
                    attn_drop,
                    initializer_range,
                    output_all_block,
                    embedding_layer,
                    input_shape=shape)