Пример #1
0
    def build_network(self, inputs):

        init = flow.kaiming_initializer(shape=inputs.shape, mode="fan_out")
        out = conv3d_layer(self.name,
                           inputs,
                           self.conv2d.out_channels,
                           kernel_size=self.kernel_dim,
                           strides=self.stride,
                           padding="SAME",
                           use_bias=True,
                           weight_initializer=init,
                           trainable=self.trainable)
        self.kernel_dim = [self.time_dim, 1, 1]
        self.stride = [self.time_stride * self.time_dim, 1, 1]
        residual = self.APM.build_network(out)
        init = flow.kaiming_initializer(shape=residual.shape, mode="fan_out")
        residual = conv3d_layer("APP3DC_temporal_" + str(self.time),
                                residual,
                                self.conv2d.out_channels,
                                kernel_size=self.kernel_dim,
                                strides=self.stride,
                                padding="VALID",
                                use_bias=False,
                                weight_initializer=init,
                                trainable=self.trainable)
        global time
        time += 1
        out = out + residual
        return out
Пример #2
0
    def build_network(self,inputs):
        # weight_2d=self.conv2d.weight.data
        # weight_3d=np.zeros(weight_2d.shape)
        # weight_3d=flow.expand_dims(weight_3d,axis=2)
        # weight_3d[:, :, 0, :, :] = weight_2d
        # init=flow.constant_initializer(weight_3d)
        #init=flow.random_uniform_initializer(minval=0, maxval=0.5)
        init=flow.kaiming_initializer(shape=inputs.shape,mode="fan_out",nonlinearity="relu")
        out=conv3d_layer(
            self.name,inputs,self.conv2d.out_channels, 
            kernel_size=self.kernel_dim, strides=self.stride,
            padding="SAME", use_bias=True,weight_initializer=init,trainable=self.trainable
        )

        self.kernel_dim=[self.time_dim,1,1]
        self.stride=[self.time_stride*self.time_dim,1,1]
        #init=flow.constant_initializer(0)
        residual=self.APM.build_network(out)
        #init=flow.random_normal_initializer(mean=0, stddev=1)
        init=flow.kaiming_initializer(shape=residual.shape,mode="fan_out",nonlinearity="relu")
        #self.padding = "SAME" if self.stride > 1 or self.kernel_dim > 1 else "VALID"
        tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f')    

        residual=conv3d_layer(
            "APP3DC_temporal_"+tempname,residual,self.conv2d.out_channels, 
            kernel_size=self.kernel_dim, 
            strides=self.stride, padding="VALID",use_bias=False,weight_initializer=init,
            trainable=self.trainable
        )

        out=out+residual
        return out
Пример #3
0
def inflate_conv(inputs,
                 conv2d,
                 time_dim=1,
                 time_padding=0,
                 time_stride=1,
                 time_dilation=1,
                 center=False,
                 times=0,
                 trainable=True):
    name = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')

    kernel_dim = [time_dim, conv2d.kernel_size[0], conv2d.kernel_size[1]]
    if isinstance(conv2d.padding, int):
        padding1 = conv2d.padding
        padding2 = conv2d.padding
    if isinstance(conv2d.padding, list):
        padding1 = conv2d.padding[0]
        padding2 = conv2d.padding[1]
    padding = [0, 0, time_padding, padding1, padding2]
    stride = [time_stride, conv2d.stride[0], conv2d.stride[0]]
    if isinstance(conv2d.dilation, int):
        dilation1 = conv2d.dilation
        dilation2 = conv2d.dilation
    if isinstance(conv2d.dilation, list):
        dilation1 = conv2d.dilation[0]
        dilation2 = conv2d.dilation[1]
    dilation = [time_dilation, dilation1, dilation2]

    # weight_2d=conv2d.weight.data
    # if center:
    #     weight_3d=np.zeros(weight_2d.shape)
    #     weight_3d=np.expand_dims(weight_3d,axis=2)
    #     weight_3d=np.tile(weight_3d,(1,1,time_dim,1,1))
    #     middle_idx = time_dim // 2
    #     weight_3d[:, :, middle_idx, :, :] = weight_2d
    # else:
    #     weight_3d=np.expand_dims(weight_3d,axis=2)
    #     weight_3d=np.tile(weight_3d,(1,1,time_dim,1,1))
    #     weight_3d=weight_3d/time_dim

    # init=flow.constant_initializer(weight_3d)

    init = flow.kaiming_initializer(shape=inputs.shape,
                                    mode="fan_out",
                                    nonlinearity="relu")
    #init=flow.random_normal_initializer(mean=0, stddev=1)
    #padding = "SAME" if stride > 1 or kernel_dim > 1 else "VALID"
    output = conv3d_layer("inflate_conv_" + str(times) + "_" + name,
                          inputs,
                          conv2d.out_channels,
                          kernel_size=kernel_dim,
                          dilation_rate=dilation,
                          strides=stride,
                          padding="SAME",
                          weight_initializer=init,
                          trainable=trainable)
    return output
Пример #4
0
 def kaiming_normal_(
     self, a=0, mode="fan_in", nonlinearity="leaky_relu", *, data_format="NCHW"
 ):
     initializer_conf = flow.kaiming_initializer(
         shape=self.shape,
         distribution="random_normal",
         mode=mode,
         nonlinearity=nonlinearity,
         negative_slope=a,
         data_format=data_format,
     )
     return self._init_by_initializer_conf(initializer_conf)
Пример #5
0
def inflate_linear(inputs, linear2d, time_dim, trainable=True):
    # weight3d=linear2d.weight.data
    # weight3d=np.tile(weight3d,(1,time_dim))
    # weight3d=weight3d/time_dim
    # init=flow.constant_initializer(weight3d)
    init = flow.kaiming_initializer(shape=inputs.shape,
                                    mode="fan_out",
                                    nonlinearity="relu")
    linear3d = flow.layers.dense(inputs,
                                 linear2d.out_features,
                                 kernel_initializer=init,
                                 trainable=trainable)
    return linear3d
Пример #6
0
def Block(x, config, name='Block_'):
    #kaiming_init_C = flow.kaiming_initializer(shape=(C, C))
    #attn of X
    x = flow.layers.layer_norm(x, name=name + 'l1')
    x = x + Causal_Self_Attention(x, config, name=name + 'attentions')
    #mlp
    x = flow.layers.layer_norm(x, name=name + 'l2')
    x = flow.layers.dense(
        inputs=x,
        units=4 * config.n_embd,
        kernel_initializer=flow.kaiming_initializer(shape=(config.n_embd,
                                                           4 * config.n_embd)),
        activation=flow.math.gelu,
        name=name + 'gelu')
    x = flow.layers.dense(
        inputs=x,
        units=config.n_embd,
        kernel_initializer=flow.kaiming_initializer(shape=(4 * config.n_embd,
                                                           config.n_embd)),
        name=name + 'dense')
    x = flow.nn.dropout(x, rate=config.resid_pdrop)

    return x
Пример #7
0
 def build_network(self,inputs):
     #pytorch中的repeat ==>numpy tile
     #由于上面使用了numpy的zeros函数导致weight3d 变成了np类型的对象,无法使用
     #flow相关的函数,因此这里的后续补充需要从zero开始。
     # oneflow.repeat(input: oneflow.python.framework.remote_blob.BlobDef, repeat_num: int, 
     # name: Optional[str] = None) → oneflow.python.framework.remote_blob.BlobDef
     #weight_3d=flow.repeat(weight_3d,)
     # weight_2d=self.conv2d.weight.data
     # weight_3d=np.zeros(weight_2d.shape)
     # weight_3d=flow.expand_dims(weight_3d,axis=2)
     # weight_3d=np.tile(weight_3d,(1,1,self.time_dim,1,1))
     # middle_dix=self.time_dim//2
     # weight_3d[:, :, middle_idx, :, :] = weight_2d
     # init=flow.constant_initializer(weight_3d)
     #init=flow.random_uniform_initializer(minval=0, maxval=0.5)
     init=flow.kaiming_initializer(shape=inputs.shape,mode="fan_out",nonlinearity="relu")
     output=conv3d_layer("conv_I3D_",inputs,self.conv2d.out_channels,
             kernel_size=self.kernel_dim,strides=self.stride, padding=self.padding,
             use_bias=True, weight_initializer=init,trainable=self.trainable
     )
     return output
Пример #8
0
    def test_float_initializer(test_case):
        initializers = [
            flow.random_normal_initializer(mean=3, stddev=4),
            flow.random_uniform_initializer(minval=-6, maxval=18),
            flow.truncated_normal_initializer(mean=-5, stddev=8),
            flow.xavier_uniform_initializer(data_format="NCHW"),
            flow.xavier_uniform_initializer(data_format="NHWC"),
            flow.xavier_normal_initializer(data_format="NCHW"),
            flow.xavier_normal_initializer(data_format="NHWC"),
            flow.constant_initializer(value=4),
            flow.ones_initializer(),
            flow.zeros_initializer(),
        ]

        kaiming_args = GenArgDict(
            OrderedDict(
                shape=[SHAPE],
                mode=["fan_in", "fan_out", "fan_avg"],
                distribution=["random_normal", "random_uniform"],
                data_format=["NCHW", "NHWC"],
                negative_slope=[0.5],
            ))
        vs_args = GenArgDict(
            OrderedDict(
                scale=[3.4],
                mode=["fan_in", "fan_out", "fan_avg"],
                distribution=[
                    "truncated_normal", "random_normal", "random_uniform"
                ],
                data_format=["NCHW", "NHWC"],
            ))
        for args in kaiming_args:
            initializers.append(flow.kaiming_initializer(**args))

        for args in vs_args:
            initializers.append(flow.variance_scaling_initializer(**args))

        for initializer in initializers:
            CompareTwoDistribution(test_case, flow.float32, initializer)
Пример #9
0
    def build_network(self,inputs):
        b,c,t,h,w=inputs.shape
        N=self.time_dim
        templist=[]
        for i in range(N):
            tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f')    
            if i!=N//2:
                out = flow.range(t, dtype=flow.int64)
                one = flow.constant_like(out, i, dtype= flow.int64)
                out=flow.math.add(out, one)
                out=flow.expand_dims(out,axis=0)
                templist.append(out)
        neighbor_time_index=flow.concat(templist,axis=0)
        neighbor_time_index=flow.transpose(neighbor_time_index,[1,0])
        neighbor_time_index=flow.flatten(neighbor_time_index, start_dim=0, end_dim=-1)


    
        # feature map registration
        tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f')    

        init=flow.kaiming_initializer(shape=inputs.shape,mode="fan_out",nonlinearity="relu")
        semantic=conv3d_layer("conv_semantic_"+tempname,inputs,self.out_channels,
            kernel_size=1,use_bias=False,padding="VALID",trainable=self.trainable,
            weight_initializer=init
        )

        inputs_norm=flow.math.l2_normalize(
            semantic,axis=1
        )


        inputs_norm_padding=flow.pad(inputs_norm,paddings=[
            (0,0),(0,0),((self.time_dim-1)//2,(self.time_dim-1)//2), (0,0),(0,0)]
        )
        inputs_norm_expand=flow.expand_dims(inputs_norm,axis=3)
        temp_inputs_norm_expand=inputs_norm_expand
        for i in range(N-2):
            inputs_norm_expand=flow.concat(
               inputs=[ inputs_norm_expand,temp_inputs_norm_expand],
                axis=3
            )
       
        inputs_norm_expand=flow.transpose(inputs_norm_expand,perm=[0, 2, 3, 4, 5, 1])
        inputs_norm_expand=flow.reshape(inputs_norm_expand,shape=[-1, h*w, c//16])

        slice_list=[]
        for index in  neighbor_time_index:
            temp=flow.slice(
                inputs_norm_padding,
                begin=[None,None,int(index),None,None],
                size=[None,None,1,None,None]
            )      
            slice_list.append(temp)
        neighbor_norm=flow.concat(
            slice_list,axis=2
        )
        neighbor_norm=flow.transpose(neighbor_norm,perm=[0, 2, 1, 3, 4])
        neighbor_norm=flow.reshape(neighbor_norm,shape=[-1, c//16, h*w])

        similarity=flow.matmul(inputs_norm_expand,neighbor_norm)*self.temperature
        similarity=nn.softmax(similarity,axis=-1)

        inputs_padding=flow.pad(inputs,
        paddings=[
            (0,0),(0,0),((self.time_dim-1)//2,(self.time_dim-1)//2), (0,0),(0,0)]
        ) 
        slice_list=[]
        for index in  neighbor_time_index:
            temp=flow.slice(
                inputs_padding,
                begin=[None,None,int(index),None,None],
                size=[None,None,1,None,None]
            )      
            slice_list.append(temp)
        neighbor=flow.concat(
            slice_list,axis=2
        )
        neighbor=flow.transpose(neighbor,perm=[0,2,3,4,1])
        neighbor=flow.reshape(neighbor,shape=[-1, h*w, c]) 

        neighbor_new=flow.matmul(similarity,neighbor)
        neighbor_new=flow.reshape(neighbor_new,shape=[b, t*(N-1), h, w, c])
        neighbor_new=flow.transpose(neighbor_new,perm=[0, 4, 1, 2, 3])

         # contrastive attention
        if self.contrastive_att:        
            temp_input=flow.expand_dims(inputs,axis=3)
            temp_temp_input=temp_input
            for i in range(N-2):
                temp_input=flow.concat(
                inputs=[ temp_input,temp_temp_input],
                axis=3
            )
            temp_input=flow.reshape(temp_input,shape=[b, c, (N-1)*t, h, w])
            input_att=conv3d_layer(
                "conv3d_inputmapping_"+tempname,temp_input,self.out_channels,
                kernel_size=1, use_bias=False,trainable=False,weight_initializer=flow.kaiming_initializer(shape=temp_input.shape,mode="fan_out",nonlinearity="relu")
            )

            n_att=conv3d_layer(
                "conv3d_nmapping_"+tempname,neighbor_new,self.out_channels,
                kernel_size=1, use_bias=False,trainable=False,weight_initializer=flow.kaiming_initializer(shape=neighbor_new.shape,mode="fan_out",nonlinearity="relu")
            )
            temp_input=input_att*n_att
            contrastive_att_net=conv3d_layer(
                "conv3d_att_net_"+tempname,temp_input,1,
                kernel_size=1, use_bias=False,trainable=self.trainable,weight_initializer=flow.kaiming_initializer(shape=temp_input.shape,mode="fan_out",nonlinearity="relu")
            )
            contrastive_att_net=flow.math.sigmoid(contrastive_att_net)
            neighbor_new=flow.math.multiply(
                neighbor_new,contrastive_att_net
            )
        # integrating feature maps

        
        init = flow.zeros_initializer()
        tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f')    

        input_offset = flow.get_variable(
            "input_offset_"+tempname,
            shape=(b, c, N*t, h, w),
            initializer=init,
            dtype=inputs.dtype,
            trainable=self.trainable)
        with flow.scope.placement("cpu", "0:0"):

        input_index=np.array(
            [i for i in range(t*N) if i%N==N//2]
        )
        neighbor_index=np.array(
            [i for i in range(t*N) if i%N!=N//2])
        input_offset_list=[]
        inputs_list=[]
        neighbor_new_list=[]
        for index in  range(input_offset.shape[2]):
            temp=flow.slice(
                input_offset,
                begin=[None,None,int(index),None,None],
                size=[None,None,1,None,None]
            )  
            input_offset_list.append(temp)
        for index in range(inputs.shape[2]):
            temp=flow.slice(
                inputs,
                begin=[None,None,int(index),None,None],
                size=[None,None,1,None,None]
            )
            inputs_list.append(temp)
        for index in range(neighbor_new.shape[2]):
            temp=flow.slice(
                neighbor_new,
                begin=[None,None,int(index),None,None],
                size=[None,None,1,None,None]
            )
            neighbor_new_list.append(temp)
        temp_index=0
        for index in input_index:
            input_offset_list[index]+=inputs_list[temp_index]
            temp_index+=1

        temp_index=0
        for index in neighbor_index:
            input_offset_list[index]+=neighbor_new_list[temp_index]
            temp_index+=1
        input_offset=flow.concat(
            input_offset_list,axis=2
        )

        return input_offset
Пример #10
0
def Causal_Self_Attention(x, config, name='csa'):
    """
    Input:: 
        x : Eembedded words input[B, T, C]
            -- B is the batch size
            -- T is the sequence length(block_size)
            -- C is the dimension of the embedding (n_embd)
               C/head_number = dimension of each head(d_k)
        config: class object defined with models.GPTConfig
    Output::
        y : output of x, which can be used as new x in next interation
    
 
    Description::
        This functions is the causl_sefl_attention core, which is a part of multiple head attention
        schema.
        Code refered from: https://github.com/karpathy/minGPT/blob/master/mingpt/model.py
        Theory refered from: http://jalammar.github.io/illustrated-gpt2/
        Related paper: 
    """
    assert config.n_embd % config.n_head == 0

    #def
    B, T, C = x.shape
    #Kaiming_initialize
    kaiming_init_C = flow.kaiming_initializer(shape=(C, C))
    ## calculate query, key, values for all heads in batch and move head forward to be the batch dim
    # define: key, query and value projections for all heads
    # process: query + key ----> value
    # dimension: (B,T,C) -> (B, nh, T, hs), nh*ns=C

    # query:The query is a representation of the current word used to score against all the other words (using their keys).
    query = flow.layers.dense(x,
                              units=config.n_embd,
                              kernel_initializer=kaiming_init_C,
                              name=(name + '_query'))
    query = flow.reshape(query, [B, T, config.n_head, C // config.n_head])
    query = flow.transpose(query, [0, 2, 1, 3])
    # key:Key vectors are like labels for all the words in the segment.
    key = flow.layers.dense(x,
                            units=config.n_embd,
                            kernel_initializer=kaiming_init_C,
                            name=(name + '_key'))
    key = flow.reshape(key, [B, T, config.n_head, C // config.n_head])
    key = flow.transpose(key, [0, 2, 1, 3])
    # value: Value vectors are actual word representations
    value = flow.layers.dense(x,
                              units=config.n_embd,
                              kernel_initializer=kaiming_init_C,
                              name=(name + 'value'))
    value = flow.reshape(value, [B, T, config.n_head, C // config.n_head])
    value = flow.transpose(value, [0, 2, 1, 3])

    ##causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)
    att = flow.matmul(query, flow.transpose(
        key, [0, 1, 3, 2])) * (1.0 / math.sqrt(key.shape[-1]))
    att_tril = flow.math.tril(
        flow.constant(value=int(-1),
                      dtype=flow.int32,
                      shape=(B, config.n_head, T, T),
                      name=name + "_ConstantLike_tril"))
    att_tril = att_tril + flow.ones_like(like=att_tril, dtype=flow.int32)
    att = flow.masked_fill(att, att_tril, float('-inf'))
    att = flow.nn.softmax(att, name=name + 'att')
    att = flow.nn.dropout(att, config.attn_pdrop)
    ## QK*V: (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)
    y = flow.matmul(att, value)
    y = flow.transpose(y, [0, 2, 1, 3])
    y = flow.reshape(y, [B, T, C])
    y = flow.nn.dropout(y, config.resid_pdrop)
    return y