def __init__(self, topology):
        self.descs = []
        self.descs.append(LayerDesc(EmbeddingPipe))

        for x in range(6):
            self.descs.append(LayerDesc(TransformerNetPipe))

        self.descs.append(lambda x: x[0])

        super().__init__(layers=self.descs,
                         loss_fn=CriterionPipe(),
                         topology=topology,
                         seg_method="layer:TransformerNetPipe")
    def __init__(self, topology):
        self.descs = []
        self.descs.append(LayerDesc(EmbeddingPipe))

        for x in range(2):
            self.descs.append(LayerDesc(TransformerNetPipe))

        super().__init__(layers=self.descs,
                         loss_fn=CriterionPipe(),
                         topology=topology,
                         seg_method="layer:TransformerNetPipe",
                         recompute_interval=1,
                         recompute_partition=False,
                         recompute_offload=False)
    def __init__(self, **kwargs):
        self.descs = []
        self.descs.append(
            SharedLayerDesc('embed',
                            EmbeddingPipe,
                            shared_weight_attr='embedding_weight'))
        self.descs.append(LayerDesc(MatmulNet))

        self.descs.append(LayerDesc(BiasNet))

        def _logits_helper(embedding, output):
            return paddle.matmul(output[0], embedding.embedding_weight)

        self.descs.append(
            SharedLayerDesc('embed',
                            EmbeddingPipe,
                            forward_func=_logits_helper,
                            shared_weight_attr='embedding_weight'))

        super(SimpleNetPipe, self).__init__(layers=self.descs,
                                            loss_fn=LossNet(),
                                            **kwargs)
示例#4
0
    def __init__(self,
                 vocab_size,
                 hidden_size=768,
                 num_hidden_layers=12,
                 num_attention_heads=12,
                 intermediate_size=3072,
                 hidden_act="gelu",
                 hidden_dropout_prob=0.1,
                 attention_probs_dropout_prob=0.1,
                 max_position_embeddings=512,
                 type_vocab_size=16,
                 initializer_range=0.02,
                 pad_token_id=0,
                 eos_token_id=7,
                 bos_token_id=0,
                 eol_token_id=3,
                 num_partitions=1,
                 topology=None,
                 recompute_interval=0):

        # forward desc
        self.descs = []

        self.descs.append(
            SharedLayerDesc('embed',
                            EmbeddingPipe,
                            shared_weight_attr='embedding_weight',
                            vocab_size=vocab_size,
                            hidden_size=hidden_size,
                            hidden_dropout_prob=hidden_dropout_prob,
                            max_position_embeddings=max_position_embeddings,
                            type_vocab_size=type_vocab_size,
                            initializer_range=0.02))

        for _ in range(num_hidden_layers):
            self.descs.append(
                LayerDesc(TransformerDecoderLayer,
                          d_model=hidden_size,
                          nhead=num_attention_heads,
                          dim_feedforward=intermediate_size,
                          dropout=hidden_dropout_prob,
                          activation=hidden_act,
                          attn_dropout=attention_probs_dropout_prob,
                          act_dropout=hidden_dropout_prob,
                          weight_attr=paddle.ParamAttr(
                              initializer=nn.initializer.Normal(
                                  mean=0.0, std=initializer_range)),
                          bias_attr=None,
                          num_partitions=num_partitions))

        self.descs.append(LayerDesc(nn.LayerNorm,
                                    normalized_shape=hidden_size))

        def _logits_helper(embedding, output):
            return parallel_matmul(output, embedding.embedding_weight, True)

        self.descs.append(
            SharedLayerDesc('embed',
                            EmbeddingPipe,
                            forward_func=_logits_helper,
                            shared_weight_attr='embedding_weight',
                            vocab_size=vocab_size,
                            hidden_size=hidden_size,
                            hidden_dropout_prob=hidden_dropout_prob,
                            max_position_embeddings=max_position_embeddings,
                            type_vocab_size=type_vocab_size,
                            initializer_range=0.02))

        super().__init__(layers=self.descs,
                         loss_fn=GPTPretrainingCriterionPipe(),
                         topology=topology,
                         seg_method="layer:TransformerDecoderLayer",
                         recompute_interval=recompute_interval,
                         recompute_partition=False,
                         recompute_offload=False)
 def __init__(self, num_classes=10, **kwargs):
     self.num_classes = num_classes
     decs = [
         LayerDesc(nn.Conv2D, 1, 64, kernel_size=11, stride=4, padding=5),
         LayerDesc(nn.ReLU),
         LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2),
         LayerDesc(nn.Conv2D, 64, 192, kernel_size=5, padding=2),
         F.relu,
         LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2),
         LayerDesc(nn.Conv2D, 192, 384, kernel_size=3, padding=1),
         F.relu,
         LayerDesc(nn.Conv2D, 384, 256, kernel_size=3, padding=1),
         F.relu,
         LayerDesc(nn.Conv2D, 256, 256, kernel_size=3, padding=1),
         F.relu,
         LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2),
         LayerDesc(ReshapeHelp, shape=[-1, 256]),
         LayerDesc(nn.Linear, 256, self.num_classes),  # classifier
     ]
     super(AlexNetPipeDesc, self).__init__(layers=decs,
                                           loss_fn=nn.CrossEntropyLoss(),
                                           **kwargs)