示例#1
0
    def __init__(self, dim=300, K=65536, m=0.999, T=0.07, mlp=False):
        """
        dim: feature dimension (default: 128)
        K: queue size; number of negative keys (default: 65536)
        m: moco momentum of updating key encoder (default: 0.999)
        T: softmax temperature (default: 0.07)
        """
        super(MoCo, self).__init__()

        self.K = K
        self.m = m
        self.T = T

        # create the encoders
        self.encoder_q = ErnieModelForSequenceClassification.from_pretrained('ernie-2.0-large-en', num_labels=dim)
        self.encoder_k = ErnieModelForSequenceClassification.from_pretrained('ernie-2.0-large-en', num_labels=dim)

        if mlp:
            dim_mlp = 1024
            self.encoder_q.classifier = D.Sequential(D.Linear(dim_mlp, dim_mlp, act='relu'),  self.encoder_q.classifier)
            self.encoder_k.classifier = D.Sequential(D.Linear(dim_mlp, dim_mlp,act='relu'), self.encoder_k.classifier)

        for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()):
            param_k=param_q  # initialize
            param_k.requires_grad = False  # not update by gradient

        # create the queue
        self.queue = L.randn([dim, K])
        self.queue = norm(self.queue, dim=0)

        self.queue_ptr = L.zeros([1], dtype='int32')
示例#2
0
    def __init__(self,
            num_class,
            vocab_size,
            emb_dim=128,
            gru_dim=256,
            fc_hid_dim=256,
            is_sparse=True,
            bi_direction=True,
            ):
        super(GRU, self).__init__()

        self.bi_direction = bi_direction

        self.embedding = D.Embedding(
            size=[vocab_size, emb_dim],
            dtype='float32',
            #param_attr=F.ParamAttr(learning_rate=30),
            is_sparse=is_sparse)

        self._hid_fc1 = D.Linear(input_dim=emb_dim, output_dim=gru_dim * 3)

        self._gru_forward = DynamicGRU(size=gru_dim, h_0=None, is_reverse=False)

        if bi_direction:
            self._gru_backward = DynamicGRU(size=gru_dim, h_0=None, is_reverse=True)
            self._hid_fc2 = D.Linear(input_dim=gru_dim * 2, output_dim=fc_hid_dim, act="tanh")
        else:
            self._hid_fc2 = D.Linear(input_dim=gru_dim, output_dim=fc_hid_dim, act="tanh")

        self._output_fc = D.Linear(input_dim=fc_hid_dim, output_dim=num_class, act=None)
示例#3
0
    def __init__(self, input_size, hidden_size, output_size, dropout_rate=0.2):
        """Prenet before passing through the network.

        Args:
            input_size (int): the input channel size.
            hidden_size (int): the size of hidden layer in network.
            output_size (int): the output channel size.
            dropout_rate (float, optional): dropout probability. Defaults to 0.2.
        """
        super(PreNet, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_rate = dropout_rate

        k = math.sqrt(1.0 / input_size)
        self.linear1 = dg.Linear(
            input_size,
            hidden_size,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.XavierInitializer()),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-k, high=k)))
        k = math.sqrt(1.0 / hidden_size)
        self.linear2 = dg.Linear(
            hidden_size,
            output_size,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.XavierInitializer()),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-k, high=k)))
示例#4
0
    def __init__(self,
                 in_channels,
                 reduction_factor,
                 prenet_sizes,
                 layers,
                 kernel_size,
                 attention_dim,
                 position_encoding_weight=1.,
                 omega=1.,
                 has_bias=False,
                 bias_dim=0,
                 keep_prob=1.):
        super(Decoder, self).__init__()
        # prenet-mind the difference of AffineBlock2 and AffineBlock1
        c_in = in_channels
        self.prenet = dg.LayerList()
        for i, c_out in enumerate(prenet_sizes):
            affine = AffineBlock2(c_in,
                                  c_out,
                                  has_bias,
                                  bias_dim,
                                  dropout=(i != 0),
                                  keep_prob=keep_prob)
            self.prenet.append(affine)
            c_in = c_out

        # causal convolutions + multihop attention
        decoder_dim = prenet_sizes[-1]
        self.causal_convs = dg.LayerList()
        self.attention_blocks = dg.LayerList()
        for i in range(layers):
            conv = ConvBlock(decoder_dim, kernel_size, True, has_bias,
                             bias_dim, keep_prob)
            attn = AttentionBlock(attention_dim, decoder_dim,
                                  position_encoding_weight, omega,
                                  reduction_factor, has_bias, bias_dim,
                                  keep_prob)
            self.causal_convs.append(conv)
            self.attention_blocks.append(attn)

        # output mel spectrogram
        output_dim = reduction_factor * in_channels  # r * mel_dim
        std = np.sqrt(1.0 / decoder_dim)
        initializer = I.NormalInitializer(loc=0., scale=std)
        out_affine = dg.Linear(decoder_dim, output_dim, param_attr=initializer)
        self.out_affine = weight_norm(out_affine, dim=-1)
        if has_bias:
            self.out_sp_affine = dg.Linear(bias_dim, output_dim)

        self.has_bias = has_bias
        self.kernel_size = kernel_size

        self.in_channels = in_channels
        self.decoder_dim = decoder_dim
        self.reduction_factor = reduction_factor
        self.out_channels = output_dim
 def __init__(self, num_features, num_classes, epsilon=1e-5, momentum=0.1):
     super().__init__()
     self.bn_in_cond = BatchNorm(num_features,
                                 affine=False,
                                 epsilon=epsilon,
                                 momentum=momentum)
     self.gamma_embed = SpectralNorm(
         dg.Linear(num_classes, num_features, bias_attr=False))
     self.beta_embed = SpectralNorm(
         dg.Linear(num_classes, num_features, bias_attr=False))
示例#6
0
    def __init__(self, in_channel, out_channel, has_bias=False, bias_dim=0):
        super(AffineBlock1, self).__init__()
        std = np.sqrt(1.0 / in_channel)
        initializer = I.NormalInitializer(loc=0., scale=std)
        affine = dg.Linear(in_channel, out_channel, param_attr=initializer)
        self.affine = weight_norm(affine, dim=-1)
        if has_bias:
            self.bias_affine = dg.Linear(bias_dim, out_channel)

        self.has_bias = has_bias
        self.bias_dim = bias_dim
示例#7
0
文件: TSN1.py 项目: suize/eco_paddle
 def __init__(self, name=None, num=None):
     super(TSNResNet, self).__init__()
     self.convbn = convbn(3, 16)
     self.convpools = dygraph.Sequential(convpool(16, 32, pooling=4),
                                         convpool(32, 64, pooling=4),
                                         convpool(64, 128))
     self.fcs = dygraph.Sequential(
         dygraph.Linear(7 * 7 * 128, 1024, act='relu'),
         dygraph.BatchNorm(1024), dygraph.Dropout(0.5),
         dygraph.Linear(1024, 101, act='softmax'))
     self.seg_num = 32
示例#8
0
    def __init__(self, mlp_head_dim, num_classes, num_bbox_reg_classes,
                 roi_size, roi_spatial_scale, roi_sampling_ratio):
        super().__init__()
        in_dim = 256  # FPN output dimension
        self.mlp_head_dim = mlp_head_dim
        self.roi_size = roi_size
        self.roi_spatial_scale = roi_spatial_scale
        self.roi_sampling_ratio = roi_sampling_ratio

        self.fc6 = dg.Linear(in_dim * roi_size * roi_size, mlp_head_dim)
        self.fc7 = dg.Linear(mlp_head_dim, mlp_head_dim)
        self.cls_score = dg.Linear(mlp_head_dim, num_classes)
        self.bbox_pred = dg.Linear(mlp_head_dim, num_bbox_reg_classes * 4)
示例#9
0
    def __init__(self, in_channel, out_channel,
                 has_bias=False, bias_dim=0, dropout=False, keep_prob=1.):
        super(AffineBlock2, self).__init__()
        if has_bias:
            std = np.sqrt(1 / bias_dim)
            self.bias_affine = dg.Linear(bias_dim, in_channel, param_attr=I.Normal(scale=std))
        std = np.sqrt(1.0 / in_channel)
        affine = dg.Linear(in_channel, out_channel, param_attr=I.Normal(scale=std))
        self.affine = weight_norm(affine, dim=-1)

        self.has_bias = has_bias
        self.bias_dim = bias_dim
        self.dropout = dropout
        self.keep_prob = keep_prob
示例#10
0
    def __init__(self):
        super(MNIST, self).__init__()

        self.cnn = dy.Conv2D(num_channels=3,
                             num_filters=1,
                             filter_size=3,
                             stride=1,
                             padding=1,
                             act='relu')

        self.cls = dy.Sequential(
            dy.Linear(input_dim=784, output_dim=128),
            dy.Dropout(p=.2),
            dy.Linear(input_dim=128, output_dim=5),
        )
示例#11
0
    def __init__(self,
                 backbone,
                 transformer,
                 num_classes,
                 num_queries,
                 aux_loss=False):
        """
        Initializes the model.

        Parameters:
            backbone: See backbone.py
            transformer: See transformer.py
            num_classes: number of object classes
            num_queries: number of object queries, ie the detection slot. This is the maximal number of objects
                         DETR can detect in a single image. For COCO, we recommend 100 queries.
            aux_loss: True if auxiliary decoding losses (loss at each decoder layer) are to be used.
        """
        super().__init__()
        self.num_queries = num_queries
        self.transformer = transformer
        hidden_dim = transformer.d_model
        self.class_embed = dg.Linear(hidden_dim, num_classes + 1)
        self.bbox_embed = MLP(hidden_dim, hidden_dim, 4, 3)
        self.query_embed = dg.Embedding((num_queries, hidden_dim))
        self.input_proj = dg.Conv2D(backbone.num_channels,
                                    hidden_dim,
                                    filter_size=1)
        self.backbone = backbone
        self.aux_loss = aux_loss
    def __init__(self,
                 code_dim=128,
                 n_class=1000,
                 chn=96,
                 blocks_with_attention="B4",
                 resolution=512):
        super().__init__()

        def GBlock(in_channel, out_channel, n_class, z_dim, use_attention):
            return ResBlock(in_channel,
                            out_channel,
                            n_class=n_class,
                            z_dim=z_dim,
                            use_attention=use_attention)

        self.embed_y = dg.Linear(n_class, 128, bias_attr=False)

        self.chn = chn
        self.resolution = resolution
        self.blocks_with_attention = set(blocks_with_attention.split(","))
        self.blocks_with_attention.discard('')

        gblock = []
        in_channels, out_channels = self.get_in_out_channels()
        self.num_split = len(in_channels) + 1

        z_dim = code_dim // self.num_split + 128
        self.noise_fc = SpectralNorm(
            dg.Linear(code_dim // self.num_split, 4 * 4 * in_channels[0]))

        self.sa_ids = [
            int(s.split('B')[-1]) for s in self.blocks_with_attention
        ]

        for i, (nc_in, nc_out) in enumerate(zip(in_channels, out_channels)):
            gblock.append(
                GBlock(nc_in,
                       nc_out,
                       n_class=n_class,
                       z_dim=z_dim,
                       use_attention=(i + 1) in self.sa_ids))
        self.blocks = dg.LayerList(gblock)

        self.output_layer_bn = BatchNorm(1 * chn, epsilon=1e-5)
        self.output_layer_conv = SpectralNorm(
            dg.Conv2D(1 * chn, 3, [3, 3], padding=1))
示例#13
0
def _build_linear(n_in, n_out, name, init, act=None):
    return D.Linear(n_in,
                    n_out,
                    param_attr=F.ParamAttr(name='%s.w_0' %
                                           name if name is not None else None,
                                           initializer=init),
                    bias_attr='%s.b_0' % name if name is not None else None,
                    act=act)
示例#14
0
    def __init__(self, cfg, num_mels=80):
        """FastSpeech model.

        Args:
            cfg: the yaml configs used in FastSpeech model.
            num_mels (int, optional): the number of mel bands when calculating mel spectrograms. Defaults to 80.

        """
        super(FastSpeech, self).__init__()

        self.encoder = Encoder(
            n_src_vocab=len(symbols) + 1,
            len_max_seq=cfg['max_seq_len'],
            n_layers=cfg['encoder_n_layer'],
            n_head=cfg['encoder_head'],
            d_k=cfg['hidden_size'] // cfg['encoder_head'],
            d_q=cfg['hidden_size'] // cfg['encoder_head'],
            d_model=cfg['hidden_size'],
            d_inner=cfg['encoder_conv1d_filter_size'],
            fft_conv1d_kernel=cfg['fft_conv1d_filter'],
            fft_conv1d_padding=cfg['fft_conv1d_padding'],
            dropout=0.1)
        self.length_regulator = LengthRegulator(
            input_size=cfg['hidden_size'],
            out_channels=cfg['duration_predictor_output_size'],
            filter_size=cfg['duration_predictor_filter_size'],
            dropout=cfg['dropout'])
        self.decoder = Decoder(
            len_max_seq=cfg['max_seq_len'],
            n_layers=cfg['decoder_n_layer'],
            n_head=cfg['decoder_head'],
            d_k=cfg['hidden_size'] // cfg['decoder_head'],
            d_q=cfg['hidden_size'] // cfg['decoder_head'],
            d_model=cfg['hidden_size'],
            d_inner=cfg['decoder_conv1d_filter_size'],
            fft_conv1d_kernel=cfg['fft_conv1d_filter'],
            fft_conv1d_padding=cfg['fft_conv1d_padding'],
            dropout=0.1)
        self.weight = fluid.ParamAttr(
            initializer=fluid.initializer.XavierInitializer())
        k = math.sqrt(1.0 / cfg['hidden_size'])
        self.bias = fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-k, high=k))
        self.mel_linear = dg.Linear(
            cfg['hidden_size'],
            num_mels * cfg['outputs_per_step'],
            param_attr=self.weight,
            bias_attr=self.bias, )
        self.postnet = PostConvNet(
            n_mels=num_mels,
            num_hidden=512,
            filter_size=5,
            padding=int(5 / 2),
            num_conv=5,
            outputs_per_step=cfg['outputs_per_step'],
            use_cudnn=True,
            dropout=0.1,
            batchnorm_last=True)
示例#15
0
    def __init__(self,
            num_class,
            vocab_size,
            emb_dim=32,
            num_filters=10,
            fc_hid_dim=32,
            num_channels=1,
            win_size_list=None,
            is_sparse=True,
            use_cudnn=True,
            ):
        super(TextCNN, self).__init__()

        self.embedding = D.Embedding(
            size=[vocab_size, emb_dim],
            dtype='float32',
            is_sparse=is_sparse)

        logging.info("num_class    = {}".format(num_class))
        logging.info("vocab size   = {}".format(vocab_size))
        logging.info("emb_dim      = {}".format(emb_dim))
        logging.info("num filters  = {}".format(num_filters))
        logging.info("fc_hid_dim   = {}".format(fc_hid_dim))
        logging.info("num channels = {}".format(num_channels))
        logging.info("windows size = {}".format(win_size_list))
        logging.info("is sparse    = {}".format(is_sparse))
        logging.info("use cudnn    = {}".format(use_cudnn))

        win_size_list = [3] if win_size_list is None else win_size_list
        def gen_conv_pool(win_size):
            """生成指定窗口的卷积池化层
            """
            return ConvPool(
                    num_channels,
                    num_filters,
                    [win_size, emb_dim],
                    padding=[1, 0],
                    use_cudnn=use_cudnn,
                    )

        self.conv_pool_list = D.LayerList([gen_conv_pool(win_size) for win_size in win_size_list])

        self._hid_fc = D.Linear(input_dim=num_filters * len(win_size_list), output_dim=fc_hid_dim, act="tanh")
        self._output_fc = D.Linear(input_dim=fc_hid_dim, output_dim=num_class, act=None)
示例#16
0
def Linear(input_dim,
           output_dim,
           param_attr=None,
           bias_attr=None,
           act=None,
           dtype="float32"):
    # a weight norm applied linear layer.
    lin = dg.Linear(input_dim, output_dim, param_attr, bias_attr, act, dtype)
    lin = WeightNormWrapper(lin, dim=1)
    return lin
示例#17
0
 def __init__(self, layers, in_channels, postnet_dim, kernel_size, out_channels, upsample_factor, has_bias=False, bias_dim=0, keep_prob=1.):
     super(PostNet, self).__init__()
     self.pre_affine = AffineBlock1(in_channels, postnet_dim, has_bias, bias_dim)
     self.convs = dg.LayerList([
         ConvBlock(postnet_dim, kernel_size, False, has_bias, bias_dim, keep_prob) for _ in range(layers)
     ])
     std = np.sqrt(1.0 / postnet_dim)
     post_affine = dg.Linear(postnet_dim, out_channels, param_attr=I.Normal(scale=std))
     self.post_affine = weight_norm(post_affine, dim=-1)
     self.upsample_factor = upsample_factor
    def __init__(self,
                 n_class=1000,
                 chn=96,
                 blocks_with_attention="B2",
                 resolution=256):
        super().__init__()

        def DBlock(in_channel,
                   out_channel,
                   downsample=True,
                   use_attention=False,
                   skip_proj=None):
            return ResBlock(in_channel,
                            out_channel,
                            conditional=False,
                            upsample=False,
                            downsample=downsample,
                            use_attention=use_attention,
                            skip_proj=skip_proj)

        self.chn = chn
        self.colors = 3
        self.resolution = resolution
        self.blocks_with_attention = set(blocks_with_attention.split(","))
        self.blocks_with_attention.discard('')

        dblock = []
        in_channels, out_channels = self.get_in_out_channels()

        self.sa_ids = [
            int(s.split('B')[-1]) for s in self.blocks_with_attention
        ]

        for i, (nc_in,
                nc_out) in enumerate(zip(in_channels[:-1], out_channels[:-1])):
            dblock.append(
                DBlock(nc_in,
                       nc_out,
                       downsample=True,
                       use_attention=(i + 1) in self.sa_ids,
                       skip_proj=nc_in == nc_out))
        dblock.append(
            DBlock(in_channels[-1],
                   out_channels[-1],
                   downsample=False,
                   use_attention=len(out_channels) in self.sa_ids,
                   skip_proj=in_channels[-1] == out_channels[-1]))
        self.blocks = dg.LayerList(dblock)

        self.final_fc = SpectralNorm(dg.Linear(16 * chn, 1))

        self.embed_y = dg.Embedding(size=[n_class, 16 * chn],
                                    is_sparse=False,
                                    param_attr=Uniform(-0.1, 0.1))
        self.embed_y = SpectralNorm(self.embed_y)
示例#19
0
    def __init__(self,
            num_class,
            vocab_size,
            emb_dim=128,
            lstm_dim=256,
            fc_hid_dim=256,
            is_sparse=True,
            bi_direction=True,
            dropout_prob=0.1,
            ):
        super(DynamicLSTMClassifier, self).__init__()

        logging.info("num_class    = {}".format(num_class))
        logging.info("vocab_size   = {}".format(vocab_size))
        logging.info("emb_dim      = {}".format(emb_dim))
        logging.info("lstm_dim      = {}".format(lstm_dim))
        logging.info("fc_hid_dim   = {}".format(fc_hid_dim))
        logging.info("is_sparse    = {}".format(is_sparse))
        logging.info("bi_direction = {}".format(bi_direction))
        logging.info("dropout_prob = {}".format(dropout_prob))

        self.bi_direction = bi_direction

        self.embedding = EmbeddingLayer(
            vocab_size=vocab_size,
            emb_dim=emb_dim,
            dtype='float32',
            is_sparse=is_sparse)

        self._lstm_forward = DynamicLSTMLayer(input_size=emb_dim, hidden_size=lstm_dim, is_reverse=False)

        if bi_direction:
            self._lstm_backward = DynamicLSTMLayer(input_size=emb_dim, hidden_size=lstm_dim, is_reverse=True)
            self._hid_fc2 = D.Linear(input_dim=lstm_dim * 2, output_dim=fc_hid_dim, act="tanh")
        else:
            self._hid_fc2 = D.Linear(input_dim=lstm_dim, output_dim=fc_hid_dim, act="tanh")

        self._output_fc = D.Linear(input_dim=fc_hid_dim, output_dim=num_class, act=None)

        self.dropout = lambda i: L.dropout(i,
                dropout_prob=dropout_prob,
                dropout_implementation="upscale_in_train") if self.training else i
示例#20
0
    def __init__(self, n_in, n_out, dropout=0):
        super(MLP, self).__init__()

        self.n_in = n_in
        self.n_out = n_out
        self.linear = dygraph.Linear(
            n_in,
            n_out,
            param_attr=initializer.Xavier(uniform=False),
            bias_attr=None,
        )
        self.dropout = SharedDropout(p=dropout)
示例#21
0
    def __init__(self,
            num_class,
            vocab_size,
            emb_dim=32,
            num_filters=10,
            fc_hid_dim=32,
            num_channels=1,
            win_size_list=None,
            is_sparse=True,
            use_cudnn=True,
            ):
        super(TextCNNClassifier, self).__init__()

        self.embedding = EmbeddingLayer(
            vocab_size=vocab_size,
            emb_dim=emb_dim,
            dtype='float32',
            is_sparse=is_sparse,
            )

        self.textcnn = TextCNNLayer(
            emb_dim,
            num_filters,
            num_channels,
            win_size_list,
            use_cudnn,
            )

        logging.info("num_class     = {}".format(num_class))
        logging.info("vocab size    = {}".format(vocab_size))
        logging.info("emb_dim       = {}".format(emb_dim))
        logging.info("num filters   = {}".format(num_filters))
        logging.info("fc_hid_dim    = {}".format(fc_hid_dim))
        logging.info("num channels  = {}".format(num_channels))
        logging.info("win size list = {}".format(win_size_list))
        logging.info("is sparse     = {}".format(is_sparse))
        logging.info("use cudnn     = {}".format(use_cudnn))

        self._hid_fc = D.Linear(input_dim=num_filters * len(win_size_list), output_dim=fc_hid_dim, act="tanh")
        self._output_fc = D.Linear(input_dim=fc_hid_dim, output_dim=num_class, act=None)
示例#22
0
    def __init__(self,
            num_class,
            vocab_size,
            emb_dim=128,
            gru_dim=256,
            fc_hid_dim=256,
            is_sparse=True,
            bi_direction=True,
            ):
        super(GRUClassifier, self).__init__()

        logging.info("num_class    = {}".format(num_class))
        logging.info("vocab_size   = {}".format(vocab_size))
        logging.info("emb_dim      = {}".format(emb_dim))
        logging.info("gru_dim      = {}".format(gru_dim))
        logging.info("fc_hid_dim   = {}".format(fc_hid_dim))
        logging.info("is_sparse    = {}".format(is_sparse))
        logging.info("bi_direction = {}".format(bi_direction))

        self.bi_direction = bi_direction

        self.embedding = EmbeddingLayer(
            vocab_size=vocab_size,
            emb_dim=emb_dim,
            dtype='float32',
            #param_attr=F.ParamAttr(learning_rate=30),
            is_sparse=is_sparse)

        self._hid_fc1 = D.Linear(input_dim=emb_dim, output_dim=gru_dim * 3)

        self._gru_forward = DynamicGRULayer(size=gru_dim, h_0=None, is_reverse=False)

        if bi_direction:
            self._gru_backward = DynamicGRULayer(size=gru_dim, h_0=None, is_reverse=True)
            self._hid_fc2 = D.Linear(input_dim=gru_dim * 2, output_dim=fc_hid_dim, act="tanh")
        else:
            self._hid_fc2 = D.Linear(input_dim=gru_dim, output_dim=fc_hid_dim, act="tanh")

        self._output_fc = D.Linear(input_dim=fc_hid_dim, output_dim=num_class, act=None)
示例#23
0
    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = dg.BatchNorm
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = dg.Conv2D(3, self.inplanes, filter_size=7, stride=2, 
                               padding=3, bias_attr=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = ReLU()
        self.maxpool = dg.Pool2D(pool_size=3, pool_type='max', pool_stride=2, pool_padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = lambda x: L.adaptive_pool2d(x, (1, 1), pool_type='avg')
        self.fc = dg.Linear(512 * block.expansion, num_classes)

        for m in self.sublayers():
            if isinstance(m, dg.Conv2D):
                m.param_attr = F.ParamAttr(initializer=F.initializer.MSRAInitializer())
            elif isinstance(m, (dg.BatchNorm, dg.GroupNorm)):
                m.param_attr = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=0.0))
                m.bias_attr = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=0.0))
        
        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.sublayers():
                if isinstance(m, Bottleneck):
                    m.bn3.param_attr = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=0.0))
                elif isinstance(m, BasicBlock):
                    m.bn2.param_attr = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=0.0))
示例#24
0
    def __init__(self, input_size, out_channels, filter_size, dropout=0.1):
        """Duration Predictor block in FastSpeech.

        Args:
            input_size (int): the channel number of input.
            out_channels (int): the output channel number.
            filter_size (int): the filter size.
            dropout (float, optional): dropout probability. Defaults to 0.1.
        """
        super(DurationPredictor, self).__init__()
        self.input_size = input_size
        self.out_channels = out_channels
        self.filter_size = filter_size
        self.dropout = dropout

        k = math.sqrt(1.0 / self.input_size)
        self.conv1 = Conv1D(
            num_channels=self.input_size,
            num_filters=self.out_channels,
            filter_size=self.filter_size,
            padding=1,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.XavierInitializer()),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-k, high=k)))
        #data_format='NTC')
        k = math.sqrt(1.0 / self.out_channels)
        self.conv2 = Conv1D(
            num_channels=self.out_channels,
            num_filters=self.out_channels,
            filter_size=self.filter_size,
            padding=1,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.XavierInitializer()),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-k, high=k)))
        #data_format='NTC')
        self.layer_norm1 = dg.LayerNorm(self.out_channels)
        self.layer_norm2 = dg.LayerNorm(self.out_channels)

        self.weight = fluid.ParamAttr(
            initializer=fluid.initializer.XavierInitializer())
        k = math.sqrt(1.0 / self.out_channels)
        self.bias = fluid.ParamAttr(
            initializer=fluid.initializer.Uniform(low=-k, high=k))

        self.linear = dg.Linear(self.out_channels,
                                1,
                                param_attr=self.weight,
                                bias_attr=self.bias)
示例#25
0
    def __init__(self, num_units, num_layers=4):
        """Highway network

        Args:
            num_units (int): dimension of hidden unit.
            num_layers (int, optional): number of highway layers. Defaults to 4.
        """
        super(Highwaynet, self).__init__()
        self.num_units = num_units
        self.num_layers = num_layers

        self.gates = []
        self.linears = []
        k = math.sqrt(1.0 / num_units)
        for i in range(num_layers):
            self.linears.append(
                dg.Linear(
                    num_units,
                    num_units,
                    param_attr=fluid.ParamAttr(
                        initializer=fluid.initializer.XavierInitializer()),
                    bias_attr=fluid.ParamAttr(
                        initializer=fluid.initializer.Uniform(
                            low=-k, high=k))))
            self.gates.append(
                dg.Linear(
                    num_units,
                    num_units,
                    param_attr=fluid.ParamAttr(
                        initializer=fluid.initializer.XavierInitializer()),
                    bias_attr=fluid.ParamAttr(
                        initializer=fluid.initializer.Uniform(
                            low=-k, high=k))))

        for i, (linear, gate) in enumerate(zip(self.linears, self.gates)):
            self.add_sublayer("linears_{}".format(i), linear)
            self.add_sublayer("gates_{}".format(i), gate)
示例#26
0
    def __init__(self):
        super(HarFcn, self).__init__()

        self.cnn1 = dy.Sequential(
            dy.Conv2D(num_channels=1,
                      num_filters=128,
                      filter_size=3,
                      stride=1,
                      padding=1),
            dy.BatchNorm(num_channels=128),
            dy.Dropout(p=.2),
        )
        self.cnn2 = dy.Sequential(
            dy.Conv2D(num_channels=128,
                      num_filters=128,
                      filter_size=3,
                      stride=1,
                      padding=1),
            dy.BatchNorm(num_channels=128),
            dy.Dropout(p=.2),
        )
        self.cnn3 = dy.Sequential(
            dy.Conv2D(num_channels=128,
                      num_filters=128,
                      filter_size=3,
                      stride=1,
                      padding=1),
            dy.BatchNorm(num_channels=128),
            dy.Dropout(p=.2),
        )

        self.cls = dy.Sequential(
            dy.Linear(input_dim=384, output_dim=128),
            dy.Dropout(p=.2),
            dy.Linear(input_dim=128, output_dim=5),
        )
示例#27
0
    def __init__(self, in_channel, kernel_size, causal=False, has_bias=False, 
                 bias_dim=None, keep_prob=1.):
        super(ConvBlock, self).__init__()
        self.causal = causal
        self.keep_prob = keep_prob
        self.in_channel = in_channel
        self.has_bias = has_bias

        std = np.sqrt(4 * keep_prob / (kernel_size * in_channel))
        padding = "valid" if causal else "same"
        conv =  Conv1D(in_channel, 2 * in_channel, (kernel_size, ),
                       padding=padding, 
                       data_format="NTC",
                       param_attr=I.Normal(scale=std))
        self.conv = weight_norm(conv)
        if has_bias:
            std = np.sqrt(1 / bias_dim)
            self.bias_affine = dg.Linear(bias_dim, 2 * in_channel, param_attr=I.Normal(scale=std))
示例#28
0
    def __init__(self,
                 attention_dim,
                 input_dim,
                 position_encoding_weight=1.,
                 position_rate=1.,
                 reduction_factor=1,
                 has_bias=False,
                 bias_dim=0,
                 keep_prob=1.):
        super(AttentionBlock, self).__init__()
        # positional encoding
        omega_default = position_rate / reduction_factor
        self.omega_default = omega_default
        # multispeaker case
        if has_bias:
            std = np.sqrt(1.0 / bias_dim)
            initializer = I.NormalInitializer(loc=0., scale=std)
            self.q_pos_affine = dg.Linear(bias_dim, 1, param_attr=initializer)
            self.k_pos_affine = dg.Linear(bias_dim, 1, param_attr=initializer)
            self.omega_initial = self.create_parameter(
                shape=[1], attr=I.ConstantInitializer(value=omega_default))

        # mind the fact that q, k, v have the same feature dimension
        # so we can init k_affine and q_affine's weight as the same matrix
        # to get a better init attention
        init_weight = np.random.normal(size=(input_dim, attention_dim),
                                       scale=np.sqrt(1. / input_dim))
        initializer = I.NumpyArrayInitializer(init_weight.astype(np.float32))
        # 3 affine transformation to project q, k, v into attention_dim
        q_affine = dg.Linear(input_dim, attention_dim, param_attr=initializer)
        self.q_affine = weight_norm(q_affine, dim=-1)
        k_affine = dg.Linear(input_dim, attention_dim, param_attr=initializer)
        self.k_affine = weight_norm(k_affine, dim=-1)

        std = np.sqrt(1.0 / input_dim)
        initializer = I.NormalInitializer(loc=0., scale=std)
        v_affine = dg.Linear(input_dim, attention_dim, param_attr=initializer)
        self.v_affine = weight_norm(v_affine, dim=-1)

        std = np.sqrt(1.0 / attention_dim)
        initializer = I.NormalInitializer(loc=0., scale=std)
        out_affine = dg.Linear(attention_dim,
                               input_dim,
                               param_attr=initializer)
        self.out_affine = weight_norm(out_affine, dim=-1)

        self.keep_prob = keep_prob
        self.has_bias = has_bias
        self.bias_dim = bias_dim
        self.attention_dim = attention_dim
        self.position_encoding_weight = position_encoding_weight
示例#29
0
 def _get_conv_layer(self, in_channels, out_channels, kernel_size, stride,
                     padding, dilation, groups, bias, padding_mode,
                     input_dim):
     # Returns the convolutional layer.
     if input_dim == 0:
         layer = dg.Linear(in_channels, out_channels, bias_attr=bias)
     else:
         layer_type = getattr(dg, 'Conv%dD' % input_dim)
         layer = layer_type(
             num_channels=in_channels,
             num_filters=out_channels,
             filter_size=kernel_size,
             stride=stride,
             padding=padding,
             dilation=dilation,
             groups=groups,
             bias_attr=bias,
         )
     return layer
示例#30
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 gain=2**(0.5),
                 use_wscale=False,
                 lrmul=1.0,
                 bias=True):
        """
            The complete conversion of Dense/FC/Linear Layer of original Tensorflow version.
        """
        super(FC, self).__init__()
        self.out_channels = out_channels

        he_std = gain * in_channels**(-0.5)  # He init
        if use_wscale:
            # init_std = 1.0 / lrmul
            # self.w_lrmul = he_std * lrmul
            self.w_lrmul = lrmul
        else:
            # init_std = he_std / lrmul
            # self.w_lrmul = lrmul
            self.w_lrmul = 1.0

        w = np.random.randn(in_channels, out_channels) * he_std * self.w_lrmul
        self.weight_attr = fluid.ParamAttr(
            initializer=fluid.initializer.NumpyArrayInitializer(w))
        #self.weight = layers.create_parameter((in_channels,out_channels),'float32')
        if bias:
            self.b_lrmul = lrmul
            b = np.random.randn(out_channels) * self.b_lrmul
            self.bias_attr = fluid.ParamAttr(
                initializer=fluid.initializer.NumpyArrayInitializer(b))
            # self.bias = layers.create_parameter((out_channels,),'float32')
        else:
            self.bias_attr = False

        self.linear = dygraph.Linear(in_channels,
                                     out_channels,
                                     param_attr=self.weight_attr,
                                     bias_attr=self.bias_attr,
                                     dtype='float32')