Python concat示例，chainer.functions.concat Python示例

示例#1

0

显示文件

文件： ffnn_pointwise_char.py 项目： ace12358/WordSegmentation

def forward_one(x, target, label, train_flag):
    # make input window vector
    distance = window // 2
    char_vecs = list()
    char_type_vecs = list()
    x = list(x)
    for i in range(distance):
        x.append('</s>')
        x.insert(0,'<s>')
    for i in range(-distance+1 , distance + 2):
        char = x[target + i]
        char_id = char2id[char]
        char_vec = model.embed(get_onehot(char_id))
        char_vecs.append(char_vec)
    char_concat = F.concat(tuple(char_vecs))
    for i in range(-distance+1 , distance + 2):
        char = x[target + i]
        char_type = make_char_type(char)
        char_type_id = char_type2id[char_type]
        char_type_vec = model.char_type_embed(get_onehot(char_type_id))
        char_type_vecs.append(char_type_vec)
    char_type_concat = F.concat(tuple(char_type_vecs))
    #dropout_concat = F.dropout(concat, ratio=dropout_rate, train=train_flag)
    concat = F.concat((char_concat, char_type_concat))
    hidden = F.sigmoid(model.hidden1(concat))
    output = model.output(hidden)
    dist = F.softmax(output)
    #print(dist.data, label, np.argmax(dist.data))
    correct = get_onehot(label)
    #print(output.data, correct.data)
    return np.argmax(dist.data), F.softmax_cross_entropy(output, correct)

示例#2

0

显示文件

文件： seq2seq.py 项目： hvy/chainer

    def forward(self, xs, ys):
        xs = [x[::-1] for x in xs]

        eos = self.xp.array([EOS], numpy.int32)
        ys_in = [F.concat([eos, y], axis=0) for y in ys]
        ys_out = [F.concat([y, eos], axis=0) for y in ys]

        # Both xs and ys_in are lists of arrays.
        exs = sequence_embed(self.embed_x, xs)
        eys = sequence_embed(self.embed_y, ys_in)

        batch = len(xs)
        # None represents a zero vector in an encoder.
        hx, cx, _ = self.encoder(None, None, exs)
        _, _, os = self.decoder(hx, cx, eys)

        # It is faster to concatenate data before calculating loss
        # because only one matrix multiplication is called.
        concat_os = F.concat(os, axis=0)
        concat_ys_out = F.concat(ys_out, axis=0)
        loss = F.sum(F.softmax_cross_entropy(
            self.W(concat_os), concat_ys_out, reduce='no')) / batch

        chainer.report({'loss': loss}, self)
        n_words = concat_ys_out.shape[0]
        perp = self.xp.exp(loss.array * batch / n_words)
        chainer.report({'perp': perp}, self)
        return loss

示例#3

0

显示文件

文件： renderer.py 项目： youndoldman/neural_renderer

    def render(self, vertices, faces, textures):
        # fill back
        if self.fill_back:
            faces = cf.concat((faces, faces[:, :, ::-1]), axis=1).data
            textures = cf.concat((textures, textures.transpose((0, 1, 4, 3, 2, 5))), axis=1)

        # lighting
        faces_lighting = neural_renderer.vertices_to_faces(vertices, faces)
        textures = neural_renderer.lighting(
            faces_lighting,
            textures,
            self.light_intensity_ambient,
            self.light_intensity_directional,
            self.light_color_ambient,
            self.light_color_directional,
            self.light_direction)

        # viewpoint transformation
        if self.camera_mode == 'look_at':
            vertices = neural_renderer.look_at(vertices, self.eye)
        elif self.camera_mode == 'look':
            vertices = neural_renderer.look(vertices, self.eye, self.camera_direction)

        # perspective transformation
        if self.perspective:
            vertices = neural_renderer.perspective(vertices, angle=self.viewing_angle)

        # rasterization
        faces = neural_renderer.vertices_to_faces(vertices, faces)
        images = neural_renderer.rasterize(
            faces, textures, self.image_size, self.anti_aliasing, self.near, self.far, self.rasterizer_eps,
            self.background_color)
        return images

示例#4

0

显示文件

文件： efattentional.py 项目： philip30/chainn

 def __call__(self, src, is_train=False, xp=np):
     # Some namings
     B  = len(src)      # Batch Size
     N  = len(src[0])   # length of source
     H  = self.H
     src_col = lambda x: Variable(self.xp.array([src[i][x] for i in range(B)], dtype=np.int32))
     embed   = lambda e, x: e(self.IE(x), is_train=is_train)
     bi_rnn  = lambda x, y: self.AE(F.concat((x[0], y[1]), axis=1))
     concat_source = lambda S, s: s if S is None else F.concat((S, s), axis=2)
     # State Reset
     self.EF.reset_state()
     self.EB.reset_state()
    
     # Forward + backward encoding
     s = []
     for j in range(N):
         s.append((
             embed(self.EF, src_col(j)),
             embed(self.EB, src_col(-j-1))
         ))
     
     # Joining the encoding data together
     S = None
     for j in range(N):
         s_j = bi_rnn(s[j], s[-j-1])
         S = concat_source(S, F.reshape(s_j, (B, H, 1)))
     S = F.swapaxes(S, 1, 2)
     return S, s_j

示例#5

0

显示文件

文件： net.py 项目： cavy-t/GRU-PredNet

    def __call__(self, x):
        for nth in range(self.layers):
            if getattr(self, 'P' + str(nth)) is None:
                setattr(self, 'P' + str(nth), variable.Variable(
                    self.xp.zeros(self.sizes[nth], dtype=x.data.dtype),
                    volatile='auto'))

        E = [None] * self.layers
        for nth in range(self.layers):
            if nth == 0:
                E[nth] = F.concat((F.relu(x - getattr(self, 'P' + str(nth))),
                                  F.relu(getattr(self, 'P' + str(nth)) - x)))
            else:
                A = F.max_pooling_2d(F.relu(getattr(self, 'ConvA' + str(nth))(E[nth - 1])), 2, stride = 2)
                E[nth] = F.concat((F.relu(A - getattr(self, 'P' + str(nth))),
                                  F.relu(getattr(self, 'P' + str(nth)) - A)))

        R = [None] * self.layers
        for nth in reversed(range(self.layers)):
            if nth == self.layers - 1:
                R[nth] = getattr(self, self.rnn_module + str(nth))((E[nth],))
            else:
                upR = F.unpooling_2d(R[nth + 1], 2, stride = 2, cover_all=False)
                R[nth] = getattr(self, self.rnn_module + str(nth))((E[nth], upR))

            if nth == 0:
                setattr(self, 'P' + str(nth), F.clipped_relu(getattr(self, 'ConvP' + str(nth))(R[nth]), 1.0))
            else:
                setattr(self, 'P' + str(nth), F.relu(getattr(self, 'ConvP' + str(nth))(R[nth])))
        
        return self.P0

示例#6

0

显示文件

文件： parse02b.py 项目： odashi/nn_parsers

  def forward(self, data):
    self.reset_state()
    
    x_list = [XP.iarray([d[0]]) for d in data]
    ep_list = [self.p_embed(x) for x in x_list]
    ec_list = [self.c_embed(x) for x in x_list]
    er_list = [self.r_embed(x) for x in x_list]
    p_list = self.p_encode(ep_list)
    c_list = self.c_encode(ec_list)
    r_list = self.r_encode(er_list)

    P = functions.reshape(
      functions.concat(p_list, 0),
      (1, len(data), self.hidden_size))
    C = functions.reshape(
      functions.concat(c_list, 0),
      (1, len(data), self.hidden_size))
    R = functions.concat(r_list, 0)

    parent_scores = functions.reshape(
      functions.batch_matmul(C, P, transb=True),
      (len(data), len(data)))
    root_scores = functions.reshape(
      self.r_scorer(R),
      (1, len(data)))

    return parent_scores, root_scores

示例#7

0

显示文件

文件： seq2seq.py 项目： re53min/TOHO_AI

    def __call__(self, x, y):
        """

        :param x: ミニバッチの入力データ
        :param y: 入力データに対応するミニバッチの出力
        :return: 誤差
        """

        batch_size = len(x)
        eos = self.xp.array([EOS], dtype='int32')

        # EOS信号の埋め込み
        y_in = [F.concat((eos, tmp), axis=0) for tmp in y]
        y_out = [F.concat((tmp, eos), axis=0) for tmp in y]

        # Embedding Layer
        emb_x = [self.x_embed(tmp) for tmp in x]
        emb_y = [self.y_embed(tmp) for tmp in y_in]

        # Encoder, Decoderへの入力
        h, c, a = self.encoder(None, None, emb_x)  # h => hidden, c => cell, a => output(Attention)
        _, _, dec_hs = self.decoder(h, c, emb_y)  # dec_hs=> output

        # Output Layerの計算
        loss = 0
        accuracy = 0
        for dec_h, t, attention in zip(dec_hs, y_out, a):
            # o = self.y(dec_h)
            o = self.global_attention_layer(dec_h, attention)  # Attention Layerの計算
            loss += F.softmax_cross_entropy(o, t)  # 誤差計算
            accuracy += F.accuracy(o, t)  # 精度計算
        loss /= batch_size
        accuracy /= batch_size

        return loss, accuracy

示例#8

0

显示文件

文件： train_DenseASPP.py 项目： masataka46/DenseASPP-chainer

    def __call__(self, x, train=True):
        # First Convolution
        c0 = self.bnF1(x)
        c0 = self.convF1(c0)
        c0 = F.relu(c0)

        c0 = self.bnF2(c0)
        c0 = self.convF2(c0)
        c0 = F.relu(c0)

        c0 = self.bnF3(c0)
        c0 = self.convF3(c0)
        c0 = F.relu(c0)

        # Atrous Convolution size 3
        cn3 = self.bn1x1_D3(c0)
        cn3 = self.conv1x1_D3(cn3)
        cn3 = self.bnD3(cn3)
        cn3 = self.dilate_conv6(cn3)
        cn3 = F.relu(cn3)

        # Atrous Convolution size 6
        cn3_con = F.concat((c0, cn3), axis=1)
        cn6 = self.bn1x1_D6(cn3_con)
        cn6 = self.conv1x1_D6(cn6)
        cn6 = self.bnD6(cn6)
        cn6 = self.dilate_conv6(cn6)
        cn6 = F.relu(cn6)

        # Atrous Convolution size 12
        cn6_con = F.concat((cn3_con, cn6), axis=1)
        cn12 = self.bn1x1_D12(cn6_con)
        cn12 = self.conv1x1_D12(cn12)
        cn12 = self.bnD12(cn12)
        cn12 = self.dilate_conv12(cn12)
        cn12 = F.relu(cn12)

        # Atrous Convolution size 18
        cn12_con = F.concat((cn6_con, cn12), axis=1)
        cn18 = self.bn1x1_D18(cn12_con)
        cn18 = self.conv1x1_D18(cn18)
        cn18 = self.bnD18(cn18)
        cn18 = self.dilate_conv18(cn18)
        cn18 = F.relu(cn18)

        # Atrous Convolution size 24
        cn18_con = F.concat((cn12_con, cn18), axis=1)
        cn24 = self.bn1x1_D24(cn18_con)
        cn24 = self.conv1x1_D24(cn24)
        cn24 = self.bnD24(cn24)
        cn24 = self.dilate_conv24(cn24)
        cn24 = F.relu(cn24)

        # Last convolution
        cn24_con = F.concat((cn18_con, cn24), axis=1)
        cL = self.bnL(cn24_con)
        cL = self.convL(cL)
        out = F.softmax(cL, axis=1)

        return out

示例#9

0

显示文件

文件： lstm_sequence.py 项目： ace12358/WordSegmentation

def forward_one(x, target, hidden, prev_c, train_flag):
    # make input window vector
    distance = window // 2
    char_vecs = list()
    x = list(x)
    for i in range(distance):
        x.append('</s>')
        x.insert(0,'<s>')
    for i in range(-distance+1 , distance + 2):
        char = x[target + i]
        char_id = char2id[char]
        char_vec = model.embed(get_onehot(char_id))
        char_vecs.append(char_vec)
    concat = F.concat(tuple(char_vecs))
    dropout_concat = F.dropout(concat, ratio=dropout_rate, train=train_flag)
    concat = F.concat((concat, hidden))
    i_gate = F.sigmoid(model.i_gate(concat))
    f_gate = F.sigmoid(model.f_gate(concat))
    o_gate = F.sigmoid(model.o_gate(concat))
    concat = F.concat((hidden, i_gate, f_gate, o_gate))
    prev_c, hidden = F.lstm(prev_c, concat)
    output = model.output(hidden)
    dist = F.softmax(output)
    #print(dist.data, label, np.argmax(dist.data))
    #correct = get_onehot(label)
    #print(output.data, correct.data)
    return dist

示例#10

0

显示文件

文件： weavenet.py 项目： ir5/chainer-chemistry

    def forward(self, x):
        n_batch, n_atom, n_feature = x.shape
        atom_repeat = functions.reshape(x, (n_batch, 1, n_atom, n_feature))
        atom_repeat = functions.broadcast_to(
            atom_repeat, (n_batch, n_atom, n_atom, n_feature))
        atom_repeat = functions.reshape(atom_repeat,
                                        (n_batch, n_atom * n_atom, n_feature))

        atom_tile = functions.reshape(x, (n_batch, n_atom, 1, n_feature))
        atom_tile = functions.broadcast_to(
            atom_tile, (n_batch, n_atom, n_atom, n_feature))
        atom_tile = functions.reshape(atom_tile,
                                      (n_batch, n_atom * n_atom, n_feature))

        pair_x0 = functions.concat((atom_tile, atom_repeat), axis=2)
        pair_x0 = functions.reshape(pair_x0,
                                    (n_batch * n_atom * n_atom, n_feature * 2))
        for l in self.linear_layers:
            pair_x0 = l(pair_x0)
            pair_x0 = functions.relu(pair_x0)
        pair_x0 = functions.reshape(pair_x0,
                                    (n_batch, n_atom * n_atom, self.n_channel))

        pair_x1 = functions.concat((atom_repeat, atom_tile), axis=2)
        pair_x1 = functions.reshape(pair_x1,
                                    (n_batch * n_atom * n_atom, n_feature * 2))
        for l in self.linear_layers:
            pair_x1 = l(pair_x1)
            pair_x1 = functions.relu(pair_x1)
        pair_x1 = functions.reshape(pair_x1,
                                    (n_batch, n_atom * n_atom, self.n_channel))
        return pair_x0 + pair_x1

示例#11

0

显示文件

文件： main-train-gen.py 项目： nushio3/UFCORIN

def channel_normalize(x, test=False):
    s0, s1, s2, s3 = x.data.shape
    cavg = F.reshape(F.sum(x, axis=1) / s1, (s0, 1, s2, s3))
    xavg = F.concat(s1 * [cavg])
    cvar = F.reshape(F.sum((x - xavg) ** 2, axis=1) / s1, (s0, 1, s2, s3))
    xvar = F.concat(s1 * [cvar])
    return (x - xavg) / (xvar + 1e-5) ** 0.5

示例#12

0

显示文件

文件： cnn.py 项目： mana-ysh/deep-crf

    def compute_vecs(self, word_ids, word_boundaries, phrase_num,
                     char_vecs=None):
        word_ids = my_variable(word_ids, volatile=not self.train)
        word_embs = self.emb(word_ids)     # total_len x dim
        word_embs_reshape = F.reshape(word_embs, (1, 1, -1, self.emb_dim))

        if self.word_level_flag and char_vecs is not None:
            # print char_vecs.data.shape
            # print word_embs.data.shape
            word_embs = F.concat([word_embs, char_vecs], axis=1)
            # print word_embs.data.shape
            dim = self.emb_dim + self.add_dim
            word_embs_reshape = F.reshape(word_embs, (1, 1, -1, dim))

        # 1 x 1 x total_len x dim
        # convolution
        word_emb_conv = self.conv(word_embs_reshape)
        # 1 x dim x total_len x 1
        word_emb_conv_reshape = F.reshape(word_emb_conv,
                                          (self.hidden_dim, -1))
        # max
        word_emb_conv_reshape = F.split_axis(word_emb_conv_reshape,
                                             word_boundaries, axis=1)

        embs = [F.max(word_emb_conv_word, axis=1) for i, word_emb_conv_word in
                enumerate(word_emb_conv_reshape) if i % 2 == 1]
        embs = F.concat(embs, axis=0)
        phrase_emb_conv = F.reshape(embs,
                                    (phrase_num, self.hidden_dim))
        return phrase_emb_conv

示例#13

0

显示文件

文件： lstm_simple.py 项目： ace12358/WordSegmentation

def forward_one(x, target, label, hidden_vec, prev_c):
    # make input window vector
    distance = window // 2
    char_vecs = list()
    x = list(x)
    for i in range(distance):
        x.append('</s>')
        x.insert(0,'<s>')
    for i in range(-distance, distance + 1):
        char = x[target + i]
        char_id = char2id[char]
        char_vec = model.embed(get_onehot(char_id))
        char_vecs.append(char_vec)
 
    concat = F.concat(tuple(char_vecs))
    concat = F.concat((concat, hidden_vec))
    i_gate = F.sigmoid(model.i_gate(concat))
    f_gate = F.sigmoid(model.f_gate(concat))
    o_gate = F.sigmoid(model.o_gate(concat))
    concat = F.concat((hidden_vec, i_gate, f_gate, o_gate))
    prev_c, hidden_vec = F.lstm(prev_c, concat)
    pred = F.softmax(model.output(hidden_vec))
    #pred = add_delta(pred)
    correct = get_onehot(label)
    return np.argmax(pred), F.softmax_cross_entropy(pred, correct)

示例#14

0

显示文件

文件： rnnlm.py 项目： re53min/TOHO_AI

    def __call__(self, x):

        batch_size = len(x)
        eos = np.array([EOS], dtype='int32')

        # EOS信号の埋め込み
        in_x = [F.concat((eos, tmp), axis=0) for tmp in x]
        in_y = [F.concat((tmp, eos), axis=0) for tmp in x]

        # Embedding Layer
        emb_x = [self.embed(tmp) for tmp in in_x]

        # LSTMへの入力
        _, _, outputs = self.h(None, None, emb_x)  # h => hidden, c => cell, a => output(Attention)

        # Output Layerの計算
        loss = 0
        for output, t in zip(outputs, in_y):
            o = self.y(output)
            # print(o.shape[0])
            # print(t[1:].shape[0])
            loss += F.softmax_cross_entropy(o, t)  # 誤差計算
        loss /= batch_size

        return loss

示例#15

0

显示文件

文件： rec_nn_model.py 项目： noforcecanstopme/deep-learning-for-sentiment-mining

def predict(node, neural_model_size, root=True):
    if isinstance(node['node'], np.ndarray):
        # leaf node
        word = np.reshape(node['node'], (1, neural_model_size))
        v = chainer.Variable(word)
    else:
        # internal node
        left_node, right_node = node['node']
        left = predict(left_node, neural_model_size, root=False)
        right = predict(right_node, neural_model_size,  root=False)
        intermediate = F.tanh(model.h(F.concat((left, right))))
        v = F.tanh(model.l(F.concat((left, right))))

    y = model.w(v)

    # evaluate root label
    if root:
        predicted = cuda.to_cpu(y.data).argmax(1)
        try:
            label = node['label']
            return predicted[0], label
        except:
            pass
        return predicted[0]

    return v

示例#16

0

显示文件

文件： model.py 项目： odashi/chainer_nmt

  def _encode(self, x_list):
    batch_size = len(x_list[0])
    source_length = len(x_list)

    # Encoding
    fc = bc = f = b = _zeros((batch_size, self.hidden_size))
    i_list = [self.x_i(_mkivar(x)) for x in x_list]
    f_list = []
    b_list = []
    for i in i_list:
      fc, f = F.lstm(fc, self.i_f(i) + self.f_f(f))
      f_list.append(f)
    for i in reversed(i_list):
      bc, b = F.lstm(bc, self.i_b(i) + self.b_b(b))
      b_list.append(b)
    b_list.reverse()

    # Making concatenated matrix
    # {f,b}_mat: shape = [batch, srclen, hidden]
    f_mat = F.concat([F.expand_dims(f, 1) for f in f_list], 1)
    b_mat = F.concat([F.expand_dims(b, 1) for b in b_list], 1)
    # fb_mat: shape = [batch, srclen, 2 * hidden]
    fb_mat = F.concat([f_mat, b_mat], 2)
    # fbe_mat: shape = [batch * srclen, atten]
    fbe_mat = self.fb_e(
        F.reshape(fb_mat, [batch_size * source_length, 2 * self.hidden_size]))

    return fb_mat, fbe_mat, fc, bc, f_list[-1], b_list[0]

示例#17

0

显示文件

文件： seq2seq_mp1.py 项目： jnishi/chainer

    def __call__(self, *inputs):
        xs = inputs[:len(inputs) // 2]
        ys = inputs[len(inputs) // 2:]

        xs = [x[::-1] for x in xs]
        batch = len(xs)

        eos = self.xp.zeros(1, self.xp.int32)
        ys_in = [F.concat([eos, y], axis=0) for y in ys]
        ys_out = [F.concat([y, eos], axis=0) for y in ys]

        eys = sequence_embed(self.embed_y, ys_in)

        # Receive hidden states from encoder process and decode.
        _, _, os, _ = self.mn_decoder(eys)

        # It is faster to concatenate data before calculating loss
        # because only one matrix multiplication is called.
        concat_os = F.concat(os, axis=0)
        concat_ys_out = F.concat(ys_out, axis=0)
        loss = F.sum(F.softmax_cross_entropy(
            self.W(concat_os), concat_ys_out, reduce='no')) / batch

        reporter.report({'loss': loss.data}, self)
        n_words = concat_ys_out.shape[0]
        perp = self.xp.exp(loss.data * batch / n_words)
        reporter.report({'perp': perp}, self)
        return loss

示例#18

0

显示文件

文件： lm_nets.py 项目： souravsingh/models

 def output_and_loss_from_seq_batch(self, y_seq_batch, t_seq_batch, normalize=None):
     y = F.concat(y_seq_batch, axis=0)
     y = F.dropout(y, ratio=self.dropout)
     t = F.concat(t_seq_batch, axis=0)
     loss = self.output.output_and_loss(y, t)
     if normalize is not None:
         loss *= 1. * t.shape[0] / normalize
     else:
         loss *= t.shape[0]
     return loss

示例#19

0

显示文件

文件： WordBraker_test.py 项目： ace12358/WordBreaker

def forward_one(x,target, hidden, prev_c, model):
    # make input window vector
    distance = window // 2
    char_vecs = list()
    char_type_vecs = list()
    x = list(x)
    for i in range(distance):
        x.append('</s>')
        x.append('</s>')
        x.insert(0,'<s>')
        x.insert(0,'<s>')
    for i in range(-distance , distance+1):
        char = x[target+2 + i]
        try:
            char_id = char2id[char]
        except(KeyError):
            char_id = char2id['UNK']
            
        char_vec = model.embed(get_onehot(char_id))
        char_vecs.append(char_vec)
        bi_gram = x[target+2+i] + x[target+2+i+1]
        try:
            bi_gram_id = char2id[bi_gram]
        except(KeyError):
            bi_gram_id = char2id['UNK']
        bi_gram_char_vec = model.embed(get_onehot(bi_gram_id))
        char_vecs.append(bi_gram_char_vec)
    char_concat = F.concat(tuple(char_vecs))
    for i in range(-distance, distance+1):
        char = x[target+2+ i]
        pre_char = x[target+2+ i + 1]
        char_type = make_char_type(char)
        pre_char_type = make_char_type(pre_char)
        bi_gram_type = pre_char_type + char_type
        char_type_id = char_type2id[char_type]
        bigram_type_id = char_type2id[bi_gram_type]
        char_type_vec = model.char_type_embed(get_onehot(char_type_id))
        bigram_type_vec = model.char_type_embed(get_onehot(bigram_type_id))
        char_type_vecs.append(char_type_vec)
        char_type_vecs.append(bigram_type_vec)
    char_type_concat = F.concat(tuple(char_type_vecs))
    #dropout_concat = F.dropout(concat, ratio=dropout_rate, train=train_flag)
    concat = F.concat((char_concat, char_type_concat))
    concat = F.concat((concat, hidden))
    i_gate = F.sigmoid(model.i_gate(concat))
    f_gate = F.sigmoid(model.f_gate(concat))
    o_gate = F.sigmoid(model.o_gate(concat))
    concat = F.concat((hidden, i_gate, f_gate, o_gate))
    prev_c, hidden = F.lstm(prev_c, concat)
    output = model.output(hidden)
    dist = F.softmax(output)
    return np.argmax(dist.data)

示例#20

0

显示文件

文件： parse01.py 项目： odashi/nn_parsers

  def forward(self, data):
    self.reset_state()
    
    x_list = [XP.iarray([d[0]]) for d in data]
    pe_list = [self.p_embed(x) for x in x_list]
    ce_list = [self.c_embed(x) for x in x_list]
    re_list = [self.r_embed(x) for x in x_list]

    pf_list = []
    for pe in pe_list:
      pf_list.append(self.p_forward(pe))

    cf_list = []
    for ce in ce_list:
      cf_list.append(self.c_forward(ce))

    rf_list = []
    for re in re_list:
      rf_list.append(self.r_forward(re))

    pb_list = []
    for pe in reversed(pe_list):
      pb_list.append(self.p_backward(pe))

    cb_list = []
    for ce in reversed(ce_list):
      cb_list.append(self.c_backward(ce))

    rb_list = []
    for re in reversed(re_list):
      rb_list.append(self.r_backward(re))

    pc_list = [self.p_combine(pf, pb) for pf, pb in zip(pf_list, pb_list)]
    cc_list = [self.c_combine(cf, cb) for cf, cb in zip(cf_list, cb_list)]
    rc_list = [self.r_combine(rf, rb) for rf, rb in zip(rf_list, rb_list)]

    P = functions.reshape(
      functions.concat(pc_list, 0),
      (1, len(data), self.hidden_size))
    C = functions.reshape(
      functions.concat(cc_list, 0),
      (1, len(data), self.hidden_size))
    R = functions.concat(rc_list, 0)

    parent_scores = functions.reshape(
      functions.batch_matmul(C, P, transb=True),
      (len(data), len(data)))
    root_scores = functions.reshape(
      self.r_scorer(R),
      (1, len(data)))

    return parent_scores, root_scores

示例#21

0

显示文件

文件： mlp_decoder.py 项目： souravsingh/models

    def single_step_forward(self, single_timestep_inputs, rel_rec, rel_send, single_timestep_rel_type):
        # single_timestep_inputs: [batch_size, num_sequences, num_nodes, feature_dims]
        # single_timestep_rel_type: [batch_size, num_sequences, num_edges, edge_types]
        batch_size, num_sequences, num_edges, _ = single_timestep_rel_type.shape
        _, num_nodes = rel_rec.shape

        # Node2edge
        # rel_rec: [num_edges, num_nodes]
        # rel_send: [num_edges, num_nodes]
        receivers = F.matmul(rel_rec, single_timestep_inputs)
        senders = F.matmul(rel_send, single_timestep_inputs)
        pre_msg = F.concat([receivers, senders], axis=-1)
        # pre_msg: [batch_size, num_sequences, num_edges, 2 * feature_dims]
        pre_msg = F.reshape(pre_msg, [batch_size * num_sequences * num_edges, -1])

        all_msgs = chainer.Variable(
            pre_msg.xp.zeros((batch_size, num_sequences, num_edges, self.msg_out_shape),
                             dtype=single_timestep_rel_type.dtype))
        if self.skip_first_edge_type:
            start_idx = 1
        else:
            start_idx = 0

        # Run separate MLP for every edge type
        # NOTE: To exlude one edge type, simply offset range by 1
        for i in range(start_idx, len(self.msg_fc2)):
            msg = F.relu(self.msg_fc1[i](pre_msg))
            msg = F.dropout(msg, self.dropout_prob)
            msg = F.relu(self.msg_fc2[i](msg))
            # msg: [batch_size * num_sequences * num_edges, msg_hid]
            msg = F.reshape(msg, [batch_size, num_sequences, num_edges, -1])
            msg = msg * single_timestep_rel_type[:, :, :, i:i + 1]
            all_msgs += msg

        # Aggregate all msgs to receiver
        # all_msgs: [batch_size, num_sequences, num_edges, msg_out_shape]
        # rel_rec: [num_edges, num_nodes]
        agg_msgs = F.matmul(rel_rec.T, all_msgs)

        # Skip connection
        aug_inputs = F.concat([single_timestep_inputs, agg_msgs], axis=-1)
        # aug_inputs: [batch_size, num_sequences, num_nodes, msg_out_shape + feature_dims]
        aug_inputs = F.reshape(aug_inputs, [batch_size * num_sequences * num_nodes, -1])

        # Output MLP
        pred = F.dropout(F.relu(self.out_fc1(aug_inputs)), self.dropout_prob)
        pred = F.dropout(F.relu(self.out_fc2(pred)), self.dropout_prob)
        pred = self.out_fc3(pred)
        pred = F.reshape(pred, [batch_size, num_sequences, num_nodes, -1])

        # Predict position/velocity difference
        return single_timestep_inputs + pred

示例#22

0

显示文件

文件： net.py 项目： asi1024/chainer

    def generate(self, x, condition):
        self.embed_queue = F.concat((self.embed_queue[:, :, 1:], x), axis=2)
        x = self.embed(self.embed_queue)
        if self.use_embed_tanh:
            x = F.tanh(x)
        x = F.relu(self.resnet.generate(x, condition))

        self.proj1_queue = F.concat((self.proj1_queue[:, :, 1:], x), axis=2)
        x = F.relu(self.proj1(self.proj1_queue))

        self.proj2_queue3 = F.concat((self.proj2_queue3[:, :, 1:], x), axis=2)
        x = self.proj2(self.proj2_queue3)
        return x

示例#23

0

显示文件

文件： model.py 项目： souravsingh/models

    def __call__(self, imgs, questions):
        feat = self.feat_extractor(imgs)

        # Append relative coordinates to each location in the feature maps.
        n, c, h, w = feat.shape
        spatial_area = h * w

        xp = self.xp
        coords_h = xp.linspace(-1, 1, h, dtype=feat.dtype)
        coords_w = xp.linspace(-1, 1, w, dtype=feat.dtype)
        coords_hh, coords_ww = xp.meshgrid(coords_h, coords_w)
        coords_hh = coords_hh[None]
        coords_ww = coords_ww[None]
        coords = xp.concatenate((coords_hh, coords_ww), axis=0)
        coords = coords.reshape(2, -1)
        coords = coords[None]  # (1, 2, spatial_area * spatial_area)
        coords = xp.repeat(coords, n, axis=0)

        # Coordinates may be cached here but the performance gain is not
        # significant so it is skipped in favor of readability.

        feat = feat.reshape(n, c, spatial_area)
        h = F.concat((feat, coords), axis=1)  # (n, c + 2, spatial_area)

        # Create coordinate pairs (differentiable meshgrid).
        h_hh = F.expand_dims(h, 2)
        h_ww = F.expand_dims(h, 3)
        h_hh = F.repeat(h_hh, spatial_area, axis=2)
        h_ww = F.repeat(h_ww, spatial_area, axis=3)
        h = F.concat((h_hh, h_ww), axis=1)

        # Append questions to each coordinate pair.
        questions = questions.astype(imgs.dtype)
        questions = questions[:, :, None, None]
        questions = F.tile(questions, (1, 1, spatial_area, spatial_area))
        h = F.concat((h, questions), axis=1)
        # (n, (c + 2) * 2 + questions_length, spatial_area, spatial_area)

        # g.
        h = F.transpose(h, (0, 2, 3, 1))
        h = F.reshape(h, (n * spatial_area * spatial_area, -1))
        h = self.g(h)
        h = F.reshape(h, (n, spatial_area * spatial_area, -1))
        h = F.sum(h, axis=1)

        h = self.f(h)

        # Logits.
        h = self.fc(h)

        return h

示例#24

0

显示文件

文件： models.py 项目： peace098beat/chainer-hikaru

    def forward(self, x):
        y1 = self.model['conv1/7x7_s2'](x)
        h = F.relu(y1)
        h = F.local_response_normalization(self.pool_func(h, 3, stride=2), n=5)
        h = F.relu(self.model['conv2/3x3_reduce'](h))
        y2 = self.model['conv2/3x3'](h)
        h = F.relu(y2)
        h = self.pool_func(F.local_response_normalization(h, n=5), 3, stride=2)
        out1 = self.model['inception_3a/1x1'](h)
        out3 = self.model[
            'inception_3a/3x3'](F.relu(self.model['inception_3a/3x3_reduce'](h)))
        out5 = self.model[
            'inception_3a/5x5'](F.relu(self.model['inception_3a/5x5_reduce'](h)))
        pool = self.model[
            'inception_3a/pool_proj'](self.pool_func(h, 3, stride=1, pad=1))
        y3 = F.concat((out1, out3, out5, pool), axis=1)
        h = F.relu(y3)

        out1 = self.model['inception_3b/1x1'](h)
        out3 = self.model[
            'inception_3b/3x3'](F.relu(self.model['inception_3b/3x3_reduce'](h)))
        out5 = self.model[
            'inception_3b/5x5'](F.relu(self.model['inception_3b/5x5_reduce'](h)))
        pool = self.model[
            'inception_3b/pool_proj'](self.pool_func(h, 3, stride=1, pad=1))
        y4 = F.concat((out1, out3, out5, pool), axis=1)
        h = F.relu(y4)

        h = self.pool_func(h, 3, stride=2)

        out1 = self.model['inception_4a/1x1'](h)
        out3 = self.model[
            'inception_4a/3x3'](F.relu(self.model['inception_4a/3x3_reduce'](h)))
        out5 = self.model[
            'inception_4a/5x5'](F.relu(self.model['inception_4a/5x5_reduce'](h)))
        pool = self.model[
            'inception_4a/pool_proj'](self.pool_func(h, 3, stride=1, pad=1))
        y5 = F.concat((out1, out3, out5, pool), axis=1)
        h = F.relu(y5)

        out1 = self.model['inception_4b/1x1'](h)
        out3 = self.model[
            'inception_4b/3x3'](F.relu(self.model['inception_4b/3x3_reduce'](h)))
        out5 = self.model[
            'inception_4b/5x5'](F.relu(self.model['inception_4b/5x5_reduce'](h)))
        pool = self.model[
            'inception_4b/pool_proj'](self.pool_func(h, 3, stride=1, pad=1))
        y6 = F.concat((out1, out3, out5, pool), axis=1)
        h = F.relu(y6)

        return [y1, y2, y3, y4, y5, y6]

示例#25

0

显示文件

文件： train_predictor.py 项目： nushio3/UFCORIN

def zoom_x2(batch):
    shape = batch.data.shape
    channel_shape = shape[0:-2]
    height, width = shape[-2:]
 
    volume = reduce(operator.mul,shape,1)
 
    b1 = F.reshape(batch,(volume,1))
    b2 = F.concat([b1,b1],1)
 
    b3 = F.reshape(b2,(volume/width,2*width))
    b4 = F.concat([b3,b3],1)
 
    return F.reshape(b4, channel_shape + (2*height ,) + (2*width ,))

示例#26

0

显示文件

文件： lda2vec_model.py 项目： AshBT/lda2vec

 def fit_partial(self, rsty_ids, raut_ids, rwrd_ids, window=5):
     doc_idx, usr_idx, wrd_idx = move(self.xp, rsty_ids, raut_ids, rwrd_ids)
     pivot = self.embed(next(move(self.xp, rwrd_ids[window: -window])))
     sty_at_pivot = rsty_ids[window: -window]
     aut_at_pivot = raut_ids[window: -window]
     sty = self.mixture_stories(next(move(self.xp, sty_at_pivot)))
     aut = self.mixture_authors(next(move(self.xp, aut_at_pivot)))
     start, end = window, rwrd_ids.shape[0] - window
     context = (F.dropout(sty, self.dropout_ratio) +
                F.dropout(aut, self.dropout_ratio) +
                F.dropout(pivot, self.dropout_ratio))
     n_frame = 2 * window
     # Precompute all neg samples since they're indep of frame
     size = context.data.shape[0]
     samples = self.sampler.sampler.sample((self.n_samples * n_frame, size))
     samples = chainer.cuda.cupy.split(samples.ravel(), n_frame)
     sources = []
     targets = []
     weights = []
     for frame in range(-window, window + 1):
         # Predict word given context and pivot word
         # The target starts before the pivot
         # Skip predicting the current pivot
         if frame == 0:
             continue
         # Here we're creating a weight mask. We don't want to
         # predict tokens that are outside this document or user
         # scope.
         wrd_at_target = rwrd_ids[start + frame: end + frame]
         sty_at_target = rsty_ids[start + frame: end + frame]
         aut_at_target = raut_ids[start + frame: end + frame]
         sty_is_same = sty_at_target == sty_at_pivot
         usr_is_same = aut_at_target == aut_at_pivot
         is_same = sty_is_same & usr_is_same
         weight, = move(self.xp, is_same.astype('float32'))
         target, = move(self.xp, wrd_at_target)
         sources.append(context)
         targets.append(target)
         weights.append(weight)
         sample, = move(self.xp, samples.pop())
         targets.append(sample)
         for _ in range(self.n_samples):
             # Note that the context is now negative
             sources.append(-context)
             weights.append(weight)
     sources = F.concat(sources, axis=0)
     targets = F.concat(targets, axis=0)
     weights = F.concat(weights, axis=0)
     loss = self.loss(sources, targets, weights)
     return loss

示例#27

0

显示文件

文件： network.py 项目： splinter21/UNet-VocalSeparation-Chainer

    def __call__(self, X):

        h1 = F.leaky_relu(self.norm1(self.conv1(X)))
        h2 = F.leaky_relu(self.norm2(self.conv2(h1)))
        h3 = F.leaky_relu(self.norm3(self.conv3(h2)))
        h4 = F.leaky_relu(self.norm4(self.conv4(h3)))
        h5 = F.leaky_relu(self.norm5(self.conv5(h4)))
        h6 = F.leaky_relu(self.norm6(self.conv6(h5)))
        dh = F.relu(F.dropout(self.denorm1(self.deconv1(h6))))
        dh = F.relu(F.dropout(self.denorm2(self.deconv2(F.concat((dh, h5))))))
        dh = F.relu(F.dropout(self.denorm3(self.deconv3(F.concat((dh, h4))))))
        dh = F.relu(self.denorm4(self.deconv4(F.concat((dh, h3)))))
        dh = F.relu(self.denorm5(self.deconv5(F.concat((dh, h2)))))
        dh = F.sigmoid(self.deconv6(F.concat((dh, h1))))
        return dh

示例#28

0

显示文件

文件： main-vaegan.py 项目： nushio3/UFCORIN

def shake_camera(img):
    s0,s1,s2,s3 = img.data.shape
    zerobar = Variable(xp.zeros((s0,s1,4,s3),dtype=np.float32))
    img = F.concat([zerobar, img, zerobar],axis=2)
    randshift=np.random.randint(1,8)
    img = F.split_axis(img, [randshift,randshift+img_w],axis=2)[1]

    zerobar = Variable(xp.zeros((s0,s1,s2,4,1),dtype=np.float32))
    img = F.reshape(img,(s0,s1,s2,s3,1))
    img = F.concat([zerobar, img, zerobar],axis=3)
    randshift=np.random.randint(1,8)
    img = F.split_axis(img, [randshift,randshift+img_w],axis=3)[1]
    img = F.reshape(img,(s0,s1,s2,s3))
     
    return img

示例#29

0

显示文件

文件： weavenet.py 项目： ir5/chainer-chemistry

    def forward(self, atom_x, pair_x, atom_only=False):
        a0 = self.atom_to_atom.forward(atom_x)
        a1 = self.pair_to_atom.forward(pair_x)
        a = functions.concat([a0, a1], axis=2)
        next_atom = self.atom_layer.forward(a)
        next_atom = functions.relu(next_atom)
        if atom_only:
            return next_atom

        p0 = self.atom_to_pair.forward(atom_x)
        p1 = self.pair_to_pair.forward(pair_x)
        p = functions.concat([p0, p1], axis=2)
        next_pair = self.pair_layer.forward(p)
        next_pair = functions.relu(next_pair)
        return next_atom, next_pair

示例#30

0

显示文件

文件： gin_gwm.py 项目： ir5/chainer-chemistry

    def __call__(self, atom_array, adj, super_node, is_real_node=None):
        """
        Describe a layer

        Args:
            atom_array (numpy.ndarray): mol-minibatch by node numpy.ndarray,
                minibatch of molecular which is represented with atom IDs (representing C, O, S, ...)
                atom_array[m, i] = a represents
                m-th molecule's i-th node is value a (atomic number)
            adj (numpy.ndarray): mol-minibatch by relation-types by node by node numpy.ndarray,
                       minibatch of multiple relational adjancency matrix with edge-type information
                       adj[i, j] = b represents
                       m-th molecule's  edge from node i to node j has value b
            super_node (numpy.ndarray): 1D array, the supernode hidden state
            is_real_node:

        Returns:
            numpy.ndarray: final molecule representation
        """
        if atom_array.dtype == self.xp.int32:
            h = self.embed(atom_array)  # (minibatch, max_num_atoms)
        else:
            h = atom_array
        # end if-else
        h0 = functions.copy(h, cuda.get_device_from_array(h.data).id)

        self.gwm.GRU_local.reset_state()
        self.gwm.GRU_super.reset_state()

        # ebmbed super node
        h_s = self.embed_super(super_node)

        g_list = []
        for step in range(self.n_message_layers):
            message_layer_index = 0 if self.weight_tying else step
            h2 = self.update_layers[message_layer_index](h, adj)
            h, h_s = self.gwm(h, h2, h_s, message_layer_index)
            if self.concat_hidden:
                g = self.readout_layers[step](h, h0, is_real_node)
                g_list.append(g)

        if self.concat_hidden:
            return functions.concat(g_list, axis=1)
        else:
            g = self.readout_layers[0](h, h0, is_real_node)
            g2 = functions.concat( (g, h_s), axis=1 )
            out_g = functions.relu(self.linear_for_concat_super(g2))
            return out_g

示例#31

0

显示文件

文件： 20-movie-notmp.py 项目： space-weather-KU/chainer-semi

        channel_input = get_normalized_image_variable(t, w)
        if channel_input is None:
            no_image = True
            continue
        channel_inputs.append(channel_input)

        channel_observed = get_normalized_image_variable(t + dt, w)
        if channel_observed is None:
            no_image = True
            continue
        channel_observeds.append(channel_observed)

    if no_image:
        continue

    img_input = F.concat(channel_inputs)
    img_observed = F.concat(channel_observeds)

    img_predicted = predictor(img_input)

    loss = F.sum(abs(img_predicted - img_observed))
    predictor.cleargrads()
    loss.backward()
    optimizer_p.update()
    """
    Train the generator and discriminator
    """
    t2 = t
    no_missing_image = True
    img_forecast = img_input
    if epoch >= start_dcgan_at_epoch:

示例#32

0

显示文件

 def __call__(self, x, encoded):
     h = F.unpooling_2d(x, ksize=2, outsize=encoded.shape[2:])
     h = F.concat([h, encoded], axis=1)
     h = super(UpBlock, self).__call__(h)
     return h

示例#33

0

显示文件

文件： conversation_new.py 项目： hukuda222/Chat-Yojo-Bot

def norm_embed(bn, xs):
    x_len = [len(x.data) for x in xs]
    x_section = np.cumsum(x_len[:-1])
    ex = bn(F.concat(xs, axis=0))
    exs = F.split_axis(ex, x_section, 0, force_tuple=True)
    return exs

示例#34

0

显示文件

文件： test_arrays.py 项目： harusametime/onnx-chainer

 def __call__(self, x1, x2):
     return F.concat((x1, x2))

示例#35

0

显示文件

        y1 = second_net(vec1)

        vec2 = [np.array([ord(char) - 96], dtype=np.int32) for char in word2]
        y2 = second_net2(vec2)


        loss = third_net(y1,y2,glove_vec)
        loss_record.append(float(loss.data))
        loss.backward(retain_grad=True)


        optimizer4.update()
        optimizer3.update()
        optimizer2.update()

        f1_loss = F.concat([second_net.x.grad,second_net2.x.grad])
        f1_loss = F.sum(F.absolute(f1_loss))
        f1_record.append(float(f1_loss.data))
        f1.grad = (f1 * f1_loss.data).data
        #f1.grad = (f1 * loss.data).data
        f1.unchain_backward()
        f1.backward(retain_grad=True)
        optimizer1.update()

    plt.plot(loss_record)
    plt.show()
    plt.plot(f1_record)
    plt.show()

    print(original_word,'is :')
    print(word1,'+',word2)

示例#36

0

显示文件

    def __call__(self, x, x_mask):
        h_dict = {}
        mask_dict = {}

        #print("Encode stage")
        #print("[new step]: input -> PConv_00")
        #print("input shape:",x.shape)
        #print("mask shape:",x_mask.shape)
        h_dict['PConv_00'], mask_dict['PConv_00'] = self.enc_layers[
            'PConv_00'](x, x_mask)
        key_prev = 'PConv_00'
        #print("PConv_00 sum: ",self.xp.sum(h_dict['PConv_00'].data))
        for i in range(1, self.layer_size):
            key = 'PConv_0' + str(i)
            #print("[new step]: ",key_prev," -> ",key)
            #print("input shape:",h_dict[key_prev].shape)
            #print("mask shape:",mask_dict[key_prev].shape)
            h_dict[key], mask_dict[key] = self.enc_layers[key](
                h_dict[key_prev], mask_dict[key_prev])
            key_prev = key
            #print(key," sum: ",self.xp.sum(h_dict[key].data))

        #print("Decode stage")
        #key_prev should be PConv06
        for i in reversed(range(self.layer_size - 1)):
            enc_in_key = 'PConv_0' + str(i)
            dec_out_key = "PConv_1" + str(i + 1)
            #print("[new step]:")
            #print("h_dict['",enc_in_key,"'] ---l")
            #print("h_dict['",key_prev,"'] --- h_dict['",dec_out_key,"']")
            #print("input enc shape:",h_dict[enc_in_key].shape)

            #unpooling (original paper used unsampling)
            h = F.unpooling_2d(h_dict[key_prev], 2, 2, 0, cover_all=False)
            mask = F.unpooling_2d(mask_dict[key_prev],
                                  2,
                                  2,
                                  0,
                                  cover_all=False)
            #print("unpooled input dec shape:",h.shape)
            #print("unpooled input mask shape:",mask.shape)

            h = F.concat([h_dict[enc_in_key], h], axis=1)
            mask = F.concat([mask_dict[enc_in_key], mask], axis=1)
            h_dict[dec_out_key], mask_dict[dec_out_key] = self.dec_layers[
                dec_out_key](h, mask)
            key_prev = dec_out_key
            #print(dec_out_key," sum: ",self.xp.sum(h_dict[dec_out_key].data))
        #last step
        dec_out_key = "PConv_10"
        #print("[new step]:")
        #print("                input ---l")
        #print("h_dict['",key_prev,"'] --- h_dict['PConv_10']")
        #print("input shape:",x.shape)

        #unpooling (original paper used unsampling)
        h = F.unpooling_2d(h_dict[key_prev], 2, 2, 0, cover_all=False)
        mask = F.unpooling_2d(mask_dict[key_prev], 2, 2, 0, cover_all=False)
        #print("unpooled input dec shape:",h.shape)
        #print("unpooled input mask shape:",mask.shape)

        h = F.concat([x, h], axis=1)
        mask = F.concat([x_mask, mask], axis=1)
        h_dict[dec_out_key], mask_dict[dec_out_key] = self.dec_layers[
            dec_out_key](h, mask)
        #print(dec_out_key," sum: ",self.xp.sum(h_dict[dec_out_key].data))

        return h_dict[dec_out_key]

示例#37

0

显示文件

def train():
    # model
    gen = Generator()
    dis = Discriminator()
    gan = chainer.Sequential(gen, dis)

    if GPU >= 0:
        chainer.cuda.get_device(GPU).use()
        gen.to_gpu()
        dis.to_gpu()
        gan.to_gpu()

    opt_d = chainer.optimizers.Adam(0.0002, beta1=0.5)
    opt_d.setup(dis)
    opt_g = chainer.optimizers.Adam(0.0002, beta1=0.5)
    opt_g.setup(gen)

    xs, paths = data_load('../Dataset/train/images/', hf=True, vf=True, rot=1)

    # training
    mb = 64
    mbi = 0
    train_ind = np.arange(len(xs))
    np.random.seed(0)
    np.random.shuffle(train_ind)

    for ite in range(5000):
        if mbi + mb > len(xs):
            mb_ind = train_ind[mbi:]
            np.random.shuffle(train_ind)
            mb_ind = np.hstack((mb_ind, train_ind[:(mb - (len(xs) - mbi))]))
            mbi = mb - (len(xs) - mbi)
        else:
            mb_ind = train_ind[mbi:mbi + mb]
            mbi += mb

        gen.cleargrads()
        dis.cleargrads()
        gan.cleargrads()

        x = xs[mb_ind]
        input_noise = np.random.uniform(-1, 1, size=(mb, 100, 1,
                                                     1)).astype(np.float32)
        dt = np.array([1] * mb + [0] * mb, dtype=np.int32).reshape([mb * 2, 1])
        gt = np.array([1] * mb, dtype=np.int32).reshape([mb, 1])

        if GPU >= 0:
            x = chainer.cuda.to_gpu(x)
            input_noise = chainer.cuda.to_gpu(input_noise)
            dt = chainer.cuda.to_gpu(dt)
            gt = chainer.cuda.to_gpu(gt)

        g_output = gen(input_noise)

        #if GPU >= 0:
        #    g_output = chainer.cuda.to_cpu(g_output)

        X = F.concat((x, g_output), axis=0)
        y = dis(X)

        loss_d = F.sigmoid_cross_entropy(y, dt)
        loss_d.backward()
        opt_d.update()

        y = gan(input_noise)

        loss_g = F.sigmoid_cross_entropy(y, gt)
        loss_g.backward()
        opt_g.update()

        loss_d = loss_d.data
        loss_g = loss_g.data

        if GPU >= 0:
            loss_d = chainer.cuda.to_cpu(loss_d)
            loss_g = chainer.cuda.to_cpu(loss_g)

        if ite % 500 == 0:
            print("iter >>", ite + 1, ',G:loss >>', loss_g.item(),
                  ', D:loss >>', loss_d.item())

    chainer.serializers.save_npz('cnn.npz', gen)

示例#38

0

显示文件

        rnd = np.random.randint(list_len)
        dir_path = image_path + image_list[rnd]
        for index in range(4, 12):
            inp = dir_path + "/" + str(0) + ".png"
            inp = prepare_dataset(inp)
            input_box.append(inp)
            img = dir_path + "/" + str(index) + ".png"
            img = prepare_dataset(img)
            frame_box.append(img)

        x = chainer.as_variable(xp.array(input_box).astype(xp.float32))
        t = chainer.as_variable(xp.array(frame_box).astype(xp.float32))
        embed = feature_extractor(t) - feature_extractor(x)
        c = feature_embed(embed)

        z = F.concat([x, c], axis=1)
        y = predictor(z)
        y_dis = discriminator_content(y)
        t_dis = discriminator_content(t)
        dis_loss = F.mean(F.softplus(-t_dis)) + F.mean(F.softplus(y_dis))

        c_g = feature_extractor(y) - feature_extractor(make_diff(y))
        c_dis = discriminator_sequence(embed)
        c_g_dis = discriminator_sequence(c_g)
        dis_loss += F.mean(F.softplus(-c_dis)) + F.mean(F.softplus(c_g_dis))

        c_g.unchain_backward()

        discriminator_content.cleargrads()
        discriminator_sequence.cleargrads()
        dis_loss.backward()

示例#39

0

显示文件

        def compute_ctxt(previous_state):
            ci1, attn1 = compute_ctxt1(previous_state)
            intermediate_state = F.concat((previous_state, ci1), axis=1)
            ci2, attn2 = compute_ctxt2(intermediate_state)

            return ci2, attn2

示例#40

0

显示文件

文件： quality_phi.py 项目： trigrass2/chainer_frmqn

 def __call__(self, h, o):
     left, right = F.split_axis(self.fch(h) + o, 2, axis=1)
     g = F.concat((F.relu(left), right), axis=1)
     #g = F.leaky_relu(self.fch(h) + o, slope=0.5)
     q = self.fcg(g)
     return q

示例#41

0

显示文件

    def __call__(self, x_data, lengths=None, d=None, first_step=False):
        batchsize = len(x_data)
        h_shape = (self.n_layers, batchsize, self.hidden_dim)
        hx = None
        cx = None

        x_data = self.xp.concatenate(x_data, axis=0)
        xs = self.word_embed(x_data)
        # dropout
        xs = F.dropout(xs, ratio=self.use_dropout)

        adv_flag = self.train and (self.use_adv or self.args.use_semi_data)

        if adv_flag:

            def norm_vec_sentence_level(d,
                                        nn_flag=False,
                                        include_norm_term=False):
                dim = d.shape[1]
                d_list = F.split_axis(d, np.cumsum(lengths)[:-1], axis=0)
                max_length = np.max(lengths)
                d_pad = F.pad_sequence(d_list, length=max_length, padding=0.0)
                d_flat = F.reshape(get_normalized_vector(d_pad, None),
                                   (-1, dim))
                split_size = np.cumsum(np.full(batchsize, max_length))[:-1]
                d_list = F.split_axis(d_flat, split_size, axis=0)
                d_list = [_d[:_length] for _d, _length in zip(d_list, lengths)]
                d = F.concat(d_list, axis=0)
                return d

            if first_step:
                if self.args.use_semi_data:
                    # Vat
                    d = self.xp.random.normal(size=xs.shape, dtype='f')
                else:
                    # Adv
                    d = self.xp.zeros(xs.shape, dtype='f')

                # Normalize at word-level
                d = get_normalized_vector(d, self.xp)

                d_var = Variable(d.astype(self.xp.float32))
                self.d_var = d_var
                xs = xs + self.args.xi_var_first * d_var

            elif d is not None:
                d_original = d.data if isinstance(d, Variable) else d
                if self.args.norm_sentence_level:
                    # Normalize at sentence-level
                    d_variable = norm_vec_sentence_level(
                        d, include_norm_term=True)
                    d = d_variable.data
                else:
                    # Normalize at word-level
                    d = get_normalized_vector(d_original, self.xp)

                xs_noise_final = self.xi_var * d
                xs = xs + xs_noise_final

        split_size = np.cumsum(lengths)[:-1]
        xs_f = F.split_axis(xs, split_size, axis=0)

        hy_f, cy_f, ys_list = self.uni_lstm(hx=hx, cx=cx, xs=xs_f)

        hy = [_h[-1] for _h in ys_list]
        hy = F.concat(hy, axis=0)
        hy = F.reshape(hy, (batchsize, -1))
        self.hy = hy

        output = self.output_mlp(hy)
        return output

示例#42

0

显示文件

文件： shakeshake.py 项目： shubhampachori12110095/pgp-chainer

 def zero_pads(self, x, pad, where):
     sizes = list(x.data.shape)
     sizes[where] = pad
     pad_mat = chainer.Variable(chainer.cuda.to_gpu(self.xp.zeros(sizes, dtype=np.float32), device=chainer.cuda.get_device_from_array(x.data)))
     return F.concat((pad_mat, x), axis=where)

示例#43

0

显示文件

文件： PosTagger.py 项目： ace12358/WordBreaker

def forward_one(x, target,model,  label, hidden, prev_c, train_flag):

    # make input window vector
    distance =  window // 2
    s_num = 3-1 + window // 2
    char_vecs = list()
    char_type_vecs = list()
    words = list()
    for wp in x:
        w = wp.split('/')[0]
        words.append(w)

    c_index_r = 0
    for i in range(target+1):
        c_index_r += len(words[i])
    c_index_l = c_index_r - len(words[target])

    x = words
    target_word = words[i]
    c = list(''.join(words))

    x = c[:c_index_l] + c[c_index_r:]
    for i in range(s_num):
        x.append('</s>')
        x.insert(0,'<s>')
    for i in range(-distance, distance+1):
    # make char vector 
        # import char
        uni_gram = x[target+s_num+i]
        bi_gram = x[target+s_num-1+i] + x[target+s_num+i]
        tri_gram = x[target+s_num-2+i] + x[target+s_num-1+i] + x[target+s_num+i]
        # char2id
        uni_gram_id = char2id[uni_gram]
        bi_gram_id = char2id[bi_gram]
        tri_gram_id = char2id[tri_gram]
        # id 2 embedding
        uni_gram_vec = model.embed(get_onehot(uni_gram_id))
        bi_gram_vec = model.embed(get_onehot(bi_gram_id))
        tri_gram_vec = model.embed(get_onehot(tri_gram_id))
        # add all char_vec
        char_vecs.append(uni_gram_vec)
        char_vecs.append(bi_gram_vec)
        char_vecs.append(tri_gram_vec)
    # make char type vector 
        # import char type
        uni_gram_type = make_char_type(uni_gram)
        bi_gram_type = make_char_type(x[target+s_num-1+i]) + make_char_type(x[target+s_num+i])
        tri_gram_type = make_char_type(x[target+s_num-2+i]) + make_char_type(x[target+s_num+i] + make_char_type(x[target+s_num-2+i]))
        # chartype 2 id
        uni_gram_type_id = char_type2id[uni_gram_type]
        bi_gram_type_id =  char_type2id[bi_gram_type]
        tri_gram_type_id = char_type2id[tri_gram_type]
        # id 2 embedding
        uni_gram_type_vec = model.char_type_embed(get_onehot(uni_gram_type_id))
        bi_gram_type_vec = model.char_type_embed(get_onehot(bi_gram_type_id))
        tri_gram_type_vec = model.char_type_embed(get_onehot(tri_gram_type_id))
        # add all char_type_vec
        char_type_vecs.append(uni_gram_type_vec)
        char_type_vecs.append(bi_gram_type_vec)
        char_type_vecs.append(tri_gram_type_vec)
    
    # word feature
    target_word_id = char2id['word:'+target_word]
    word_vec = model.embed(get_onehot(target_word_id))
    ct_word = list()
    for c in target_word:
        ct_word.append(make_char_type(c))
    try:
        ct_index = char_type2id[''.join(list(set(ct_word)))]
    except:
        ct_index = char_type2id['other']
    one_hot = list()
    one_hot = [0]*len(char_type2id)
    one_hot[ct_index] = 1
    #for i in range(len(char_type2id)):
    #    if i == ct_index:
    #        one_hot.append(1)
    #    else:
    #        one_hot.append(0)
    
    pos_d = list()
    for i in range(23):
        if target_word+'/'+id2pos[i] in word_pos:
            pos_d.append(1)
        else:
            pos_d.append(0)
    #pos_d = [0]*23
    
    ct_word_vec = chainer.Variable(np.array([one_hot], dtype=np.float32))
    pos_d_vec = chainer.Variable(np.array([pos_d], dtype=np.float32))
    
    char_concat = F.concat(tuple(char_vecs))
    char_type_concat = F.concat(tuple(char_type_vecs))
    #dropout_concat = F.dropout(concat, ratio=dropout_rate, train=train_flag)
    concat = F.concat((char_concat, char_type_concat, word_vec))
    concat = F.concat((concat, hidden))
    i_gate = F.sigmoid(model.i_gate(concat))
    f_gate = F.sigmoid(model.f_gate(concat))
    o_gate = F.sigmoid(model.o_gate(concat))
    concat = F.concat((hidden, i_gate, f_gate, o_gate))
    prev_c, hidden = F.lstm(prev_c, concat)
    output = model.output(F.concat((hidden, ct_word_vec, pos_d_vec)))
    dist = F.softmax(output)
    #print(dist.data, label, np.argmax(dist.data))
    #print(output.dataect.data)
    return np.argmax(dist.data)

示例#44

0

显示文件

文件： decoders.py 项目： zhuanaa/espnet

    def __call__(self, hs, ys):
        """Core function of Decoder layer.

        Args:
            hs (list of chainer.Variable | N-dimension array): Input variable from encoder.
            ys (list of chainer.Variable | N-dimension array): Input variable of decoder.

        Returns:
            chainer.Variable: A variable holding a scalar array of the training loss.
            chainer.Variable: A variable holding a scalar array of the accuracy.

        """
        self.loss = None
        # prepare input and output word sequences with sos/eos IDs
        eos = self.xp.array([self.eos], 'i')
        sos = self.xp.array([self.sos], 'i')
        ys_in = [F.concat([sos, y], axis=0) for y in ys]
        ys_out = [F.concat([y, eos], axis=0) for y in ys]

        # padding for ys with -1
        # pys: utt x olen
        pad_ys_in = F.pad_sequence(ys_in, padding=self.eos)
        pad_ys_out = F.pad_sequence(ys_out, padding=-1)

        # get dim, length info
        batch = pad_ys_out.shape[0]
        olength = pad_ys_out.shape[1]
        logging.info(self.__class__.__name__ + ' input lengths:  ' +
                     str(self.xp.array([h.shape[0] for h in hs])))
        logging.info(self.__class__.__name__ + ' output lengths: ' +
                     str(self.xp.array([y.shape[0] for y in ys_out])))

        # initialization
        c_list = [None]  # list of cell state of each layer
        z_list = [None]  # list of hidden state of each layer
        for _ in six.moves.range(1, self.dlayers):
            c_list.append(None)
            z_list.append(None)
        att_w = None
        z_all = []
        self.att.reset()  # reset pre-computation of h

        # pre-computation of embedding
        eys = self.embed(pad_ys_in)  # utt x olen x zdim
        eys = F.separate(eys, axis=1)

        # loop for an output sequence
        for i in six.moves.range(olength):
            att_c, att_w = self.att(hs, z_list[0], att_w)
            if i > 0 and random.random() < self.sampling_probability:
                logging.info(' scheduled sampling ')
                z_out = self.output(z_all[-1])
                z_out = F.argmax(F.log_softmax(z_out), axis=1)
                z_out = self.embed(z_out)
                ey = F.hstack((z_out, att_c))  # utt x (zdim + hdim)
            else:
                ey = F.hstack((eys[i], att_c))  # utt x (zdim + hdim)
            z_list, c_list = self.rnn_forward(ey, z_list, c_list, z_list,
                                              c_list)
            z_all.append(z_list[-1])

        z_all = F.reshape(F.stack(z_all, axis=1),
                          (batch * olength, self.dunits))
        # compute loss
        y_all = self.output(z_all)
        self.loss = F.softmax_cross_entropy(y_all, F.flatten(pad_ys_out))
        # -1: eos, which is removed in the loss computation
        self.loss *= (np.mean([len(x) for x in ys_in]) - 1)
        acc = F.accuracy(y_all, F.flatten(pad_ys_out), ignore_label=-1)
        logging.info('att loss:' + str(self.loss.data))

        # show predicted character sequence for debug
        if self.verbose > 0 and self.char_list is not None:
            y_hat = F.reshape(y_all, (batch, olength, -1))
            y_true = pad_ys_out
            for (i, y_hat_), y_true_ in zip(enumerate(y_hat.data),
                                            y_true.data):
                if i == MAX_DECODER_OUTPUT:
                    break
                idx_hat = self.xp.argmax(y_hat_[y_true_ != -1], axis=1)
                idx_true = y_true_[y_true_ != -1]
                seq_hat = [self.char_list[int(idx)] for idx in idx_hat]
                seq_true = [self.char_list[int(idx)] for idx in idx_true]
                seq_hat = "".join(seq_hat).replace('<space>', ' ')
                seq_true = "".join(seq_true).replace('<space>', ' ')
                logging.info("groundtruth[%d]: " % i + seq_true)
                logging.info("prediction [%d]: " % i + seq_hat)

        if self.labeldist is not None:
            if self.vlabeldist is None:
                self.vlabeldist = chainer.Variable(
                    self.xp.asarray(self.labeldist))
            loss_reg = -F.sum(
                F.scale(F.log_softmax(y_all), self.vlabeldist,
                        axis=1)) / len(ys_in)
            self.loss = (
                1. - self.lsm_weight) * self.loss + self.lsm_weight * loss_reg

        return self.loss, acc

示例#45

0

显示文件

文件： TrainWordBraker_ffnn.py 项目： ace12358/WordBreaker

def forward_one(x, target, label, word_dict, train_flag):
    # make dict feature vector
    dict_vec = list()
    L1 = L2 = L3 = L4 = R1 = R2 = R3 = R4 = I1 = I2 = I3 = I4 = 0
    for i in range(len(x[:target])):
        word_candidate = x[target-(i+1):target]
        if word_candidate in word_dict:
            if len(word_candidate) == 1:
                L1 = 1
            elif len(word_candidate) == 2:
                L2 = 1
            elif len(word_candidate) == 3:
                L3 = 1
            else:
                L4 = 1
        if i == 10:
            break

    for i in range(len(x[target:])):
        word_candidate = x[target:target+i+1]
        if word_candidate in word_dict:
            if len(word_candidate) == 1:
                R1 = 1
            elif len(word_candidate) == 2:
                R2 = 1
            elif len(word_candidate) == 3:
                R3 = 1
            else:
                R4 = 1
        if i == 10:
            break
    
    for i in range(1, 6, 1):
        for j in range(1, 6, 1):
            word_candidate = x[target-i:target+j]
            if word_candidate in word_dict:
                if len(word_candidate) == 1:
                    I1 = 1
                elif len(word_candidate) == 2:
                    I2 = 1
                elif len(word_candidate) == 3:
                    I3 = 1
                else:
                    I4 = 1
    dict_vec = chainer.Variable(np.array([[L1,L2,L3,L4,R1,R2,R3,R4,I1,I2,I3,I4]], dtype=np.float32))
    # dict_embed_vec = model.dict_embed(dict_vec)
    # make input window vector
    distance =  window // 2
    s_num = 3-1 + window // 2
    char_vecs = list()
    char_type_vecs = list()
    x = list(x)
    for i in range(s_num):
        x.append('</s>')
        x.insert(0,'<s>')
    for i in range(-distance, distance+1):

    # make char vector 
        # import char
        uni_gram = x[target+s_num+i]
        bi_gram = x[target+s_num-1+i] + x[target+s_num+i]
        tri_gram = x[target+s_num-2+i] + x[target+s_num-1+i] + x[target+s_num+i]
        # char2id
        uni_gram_id = char2id[uni_gram]
        bi_gram_id = char2id[bi_gram]
        tri_gram_id = char2id[tri_gram]
        # id 2 embedding
        uni_gram_vec = model.embed(get_onehot(uni_gram_id))
        bi_gram_vec = model.embed(get_onehot(bi_gram_id))
        tri_gram_vec = model.embed(get_onehot(tri_gram_id))
        # add all char_vec
        char_vecs.append(uni_gram_vec)
        char_vecs.append(bi_gram_vec)
        char_vecs.append(tri_gram_vec)
    # make char type vector 
        # import char type
        uni_gram_type = make_char_type(uni_gram)
        bi_gram_type = make_char_type(x[target+s_num-1+i]) + make_char_type(x[target+s_num+i])
        tri_gram_type = make_char_type(x[target+s_num-2+i]) + make_char_type(x[target+s_num+i] + make_char_type(x[target+s_num-2+i]))
        # chartype 2 id
        uni_gram_type_id = char_type2id[uni_gram_type]
        bi_gram_type_id =  char_type2id[bi_gram_type]
        tri_gram_type_id = char_type2id[tri_gram_type]
        # id 2 embedding
        uni_gram_type_vec = model.char_type_embed(get_onehot(uni_gram_type_id))
        bi_gram_type_vec = model.char_type_embed(get_onehot(bi_gram_type_id))
        tri_gram_type_vec = model.char_type_embed(get_onehot(tri_gram_type_id))
        # add all char_type_vec
        char_type_vecs.append(uni_gram_type_vec)
        char_type_vecs.append(bi_gram_type_vec)
        char_type_vecs.append(tri_gram_type_vec)

    char_concat = F.concat(tuple(char_vecs))
    char_type_concat = F.concat(tuple(char_type_vecs))
    #dropout_concat = F.dropout(concat, ratio=dropout_rate, train=train_flag)
    concat = F.concat((char_concat, char_type_concat))
    hidden = model.hidden1(concat)
    hidden2 = F.concat((hidden, dict_vec))
    output = model.output(hidden2)
    dist = F.softmax(output)
    #print(dist.data, label, np.argmax(dist.data))
    correct = get_onehot(label)
    #print(output.data, correct.data)
    return np.argmax(dist.data), F.softmax_cross_entropy(output, correct)

示例#46

0

显示文件

def posneg(x):
    n, c, h, w = x.shape
    y = F.concat((x, -x), axis=2)
    return y.reshape((n, c * 2, h, w))

示例#47

0

显示文件

文件： common.py 项目： yyfyan/imgclsmob

 def __call__(self, x, **kwargs):
     out = []
     for name in self.layer_names:
         out.append(self[name](x, **kwargs))
     out = F.concat(tuple(out), axis=self.axis)
     return out

示例#48

0

显示文件

文件： ssd_net.py 项目： yagiken525/SSD_webcamera

    def __call__(self, x, conf, loc, conf_mask, loc_mask):
        h = F.relu(self.conv1_1(x))
        h = F.max_pooling_2d(F.relu(self.conv1_2(h)), 2, 2)
        h = F.relu(self.conv2_1(h))
        h = F.max_pooling_2d(F.relu(self.conv2_2(h)), 2, 2)
        h = F.relu(self.conv3_1(h))
        h = F.relu(self.conv3_2(h))
        h = F.max_pooling_2d(F.relu(self.conv3_3(h)), 2, 2)
        h = F.relu(self.conv4_1(h))
        h = F.relu(self.conv4_2(h))
        h = F.relu(self.conv4_3(h))

        self.h_conv4_3 = h

        h = F.max_pooling_2d(h, 2, 2)
        h = F.relu(self.conv5_1(h))
        h = F.relu(self.conv5_2(h))
        h = F.max_pooling_2d(F.relu(self.conv5_3(h)), 3, stride=1, pad=1)

        h = F.relu(self.fc6(h))
        h = F.relu(self.fc7(h))
        self.h_fc7 = h

        h = F.relu(self.conv6_1(h))
        h = F.relu(self.conv6_2(h))
        self.h_conv6_2 = h

        h = F.relu(self.conv7_1(h))
        h = F.relu(self.conv7_2(h))
        self.h_conv7_2 = h

        h = F.relu(self.conv8_1(h))
        h = F.relu(self.conv8_2(h))
        self.h_conv8_2 = h

        h = F.average_pooling_2d(h, 3)
        self.h_pool6 = h

        batchsize, ch, hh, ww = self.h_conv4_3.shape
        kari = F.reshape(self.h_conv4_3, (batchsize * ch, hh * ww))
        kari = F.transpose(kari, (1, 0))
        kari = F.normalize(kari)
        kari = F.transpose(kari, (1, 0))
        kari = F.reshape(kari, (batchsize, ch, hh, ww))

        self.h_conv4_3_norm = self.normalize(kari)
        self.h_conv4_3_norm_mbox_loc = self.conv4_3_norm_mbox_loc(
            self.h_conv4_3_norm)
        self.h_conv4_3_norm_mbox_conf = self.conv4_3_norm_mbox_conf(
            self.h_conv4_3_norm)
        self.h_conv4_3_norm_mbox_loc_perm = F.transpose(
            self.h_conv4_3_norm_mbox_loc, (0, 2, 3, 1))
        self.h_conv4_3_norm_mbox_conf_perm = F.transpose(
            self.h_conv4_3_norm_mbox_conf, (0, 2, 3, 1))

        self.h_conv4_3_norm_mbox_loc_flat = F.reshape(
            self.h_conv4_3_norm_mbox_loc_perm,
            (batchsize, self.c4_h, self.c4_w, self.c4_d, 4))
        self.h_conv4_3_norm_mbox_conf_flat = F.reshape(
            self.h_conv4_3_norm_mbox_conf_perm,
            (batchsize, self.c4_h, self.c4_w, self.c4_d, 21))

        self.h_fc7_mbox_loc = self.fc7_mbox_loc(self.h_fc7)
        self.h_fc7_mbox_conf = self.fc7_mbox_conf(self.h_fc7)
        self.h_fc7_mbox_loc_perm = F.transpose(self.h_fc7_mbox_loc,
                                               (0, 2, 3, 1))
        self.h_fc7_mbox_conf_perm = F.transpose(self.h_fc7_mbox_conf,
                                                (0, 2, 3, 1))
        self.h_fc7_mbox_loc_flat = F.reshape(
            self.h_fc7_mbox_loc_perm,
            (batchsize, self.f7_h, self.f7_w, self.f7_d, 4))
        self.h_fc7_mbox_conf_flat = F.reshape(
            self.h_fc7_mbox_conf_perm,
            (batchsize, self.f7_h, self.f7_w, self.f7_d, 21))

        self.h_conv6_2_mbox_loc = self.conv6_2_mbox_loc(self.h_conv6_2)
        self.h_conv6_2_mbox_conf = self.conv6_2_mbox_conf(self.h_conv6_2)
        self.h_conv6_2_mbox_loc_perm = F.transpose(self.h_conv6_2_mbox_loc,
                                                   (0, 2, 3, 1))
        self.h_conv6_2_mbox_conf_perm = F.transpose(self.h_conv6_2_mbox_conf,
                                                    (0, 2, 3, 1))
        self.h_conv6_2_mbox_loc_flat = F.reshape(
            self.h_conv6_2_mbox_loc_perm,
            (batchsize, self.c6_h, self.c6_w, self.c6_d, 4))
        self.h_conv6_2_mbox_conf_flat = F.reshape(
            self.h_conv6_2_mbox_conf_perm,
            (batchsize, self.c6_h, self.c6_w, self.c6_d, 21))

        self.h_conv7_2_mbox_loc = self.conv7_2_mbox_loc(self.h_conv7_2)
        self.h_conv7_2_mbox_conf = self.conv7_2_mbox_conf(self.h_conv7_2)
        self.h_conv7_2_mbox_loc_perm = F.transpose(self.h_conv7_2_mbox_loc,
                                                   (0, 2, 3, 1))
        self.h_conv7_2_mbox_conf_perm = F.transpose(self.h_conv7_2_mbox_conf,
                                                    (0, 2, 3, 1))
        self.h_conv7_2_mbox_loc_flat = F.reshape(
            self.h_conv7_2_mbox_loc_perm,
            (batchsize, self.c7_h, self.c7_w, self.c7_d, 4))
        self.h_conv7_2_mbox_conf_flat = F.reshape(
            self.h_conv7_2_mbox_conf_perm,
            (batchsize, self.c7_h, self.c7_w, self.c7_d, 21))

        self.h_conv8_2_mbox_loc = self.conv8_2_mbox_loc(self.h_conv8_2)
        self.h_conv8_2_mbox_conf = self.conv8_2_mbox_conf(self.h_conv8_2)
        self.h_conv8_2_mbox_loc_perm = F.transpose(self.h_conv8_2_mbox_loc,
                                                   (0, 2, 3, 1))
        self.h_conv8_2_mbox_conf_perm = F.transpose(self.h_conv8_2_mbox_conf,
                                                    (0, 2, 3, 1))
        self.h_conv8_2_mbox_loc_flat = F.reshape(
            self.h_conv8_2_mbox_loc_perm,
            (batchsize, self.c8_h, self.c8_w, self.c8_d, 4))
        self.h_conv8_2_mbox_conf_flat = F.reshape(
            self.h_conv8_2_mbox_conf_perm,
            (batchsize, self.c8_h, self.c8_w, self.c8_d, 21))

        self.h_pool6_mbox_loc = self.pool6_mbox_loc(self.h_pool6)
        self.h_pool6_mbox_conf = self.pool6_mbox_conf(self.h_pool6)
        self.h_pool6_mbox_loc_perm = F.transpose(self.h_pool6_mbox_loc,
                                                 (0, 2, 3, 1))
        self.h_pool6_mbox_conf_perm = F.transpose(self.h_pool6_mbox_conf,
                                                  (0, 2, 3, 1))
        self.h_pool6_mbox_loc_flat = F.reshape(
            self.h_pool6_mbox_loc_perm,
            (batchsize, self.p6_h, self.p6_w, self.p6_d, 4))
        self.h_pool6_mbox_conf_flat = F.reshape(
            self.h_pool6_mbox_conf_perm,
            (batchsize, self.p6_h, self.p6_w, self.p6_d, 21))

        self.mbox_loc = F.concat([
            F.reshape(self.h_conv4_3_norm_mbox_loc_flat, [batchsize, -1, 4]),
            F.reshape(self.h_fc7_mbox_loc_flat, [batchsize, -1, 4]),
            F.reshape(self.h_conv6_2_mbox_loc_flat, [batchsize, -1, 4]),
            F.reshape(self.h_conv7_2_mbox_loc_flat, [batchsize, -1, 4]),
            F.reshape(self.h_conv8_2_mbox_loc_flat, [batchsize, -1, 4]),
            F.reshape(self.h_pool6_mbox_loc_flat, [batchsize, -1, 4]),
        ],
                                 axis=1)

        self.mbox_conf = F.concat([
            F.reshape(self.h_conv4_3_norm_mbox_conf_flat, [batchsize, -1, 21]),
            F.reshape(self.h_fc7_mbox_conf_flat, [batchsize, -1, 21]),
            F.reshape(self.h_conv6_2_mbox_conf_flat, [batchsize, -1, 21]),
            F.reshape(self.h_conv7_2_mbox_conf_flat, [batchsize, -1, 21]),
            F.reshape(self.h_conv8_2_mbox_conf_flat, [batchsize, -1, 21]),
            F.reshape(self.h_pool6_mbox_conf_flat, [batchsize, -1, 21]),
        ],
                                  axis=1)

        self.mbox_conf_reahpe = F.reshape(self.mbox_conf,
                                          (7308 * batchsize, 21))
        self.mbox_conf_softmax = F.softmax(self.mbox_conf_reahpe)
        self.mbox_conf_softmax_reahpe = F.reshape(self.mbox_conf_softmax,
                                                  (batchsize, 7308, 21))

        if self.train:
            mbox_conf = cuda.to_cpu(self.mbox_conf_softmax_reahpe.data)
            dammy_label = np.zeros([batchsize, 7308, 21])
            for i in range(batchsize):
                self.conf_num = int(conf_mask[i].sum())
                self.mask = conf_mask[i].copy()
                negative_sample_num = int(
                    conf_mask[i].sum() *
                    5) if int(conf_mask[i].sum() * 5) < 4000 else 4000
                self.num = negative_sample_num
                negative_index = mbox_conf[i, :,
                                           0].argsort()[:negative_sample_num]
                self.ind = negative_index
                self.conf_mask = conf_mask[i]
                conf_mask[i, negative_index] = 1
                dammy_label[i][np.where(conf_mask[i, :, 0] == 0)][0] = 100
            t_conf_mask = chainer.Variable(cuda.cupy.array(conf_mask),
                                           volatile=x.volatile)
            t_loc_mask = chainer.Variable(cuda.cupy.array(loc_mask),
                                          volatile=x.volatile)
            dammy_label = cuda.cupy.array(dammy_label)
            self.t_conf_mask = t_conf_mask
            train_conf = self.mbox_conf * t_conf_mask
            train_conf.data += dammy_label
            self.train_conf = F.reshape(train_conf, (-1, 21))
            #train_conf = F.reshape(self.mbox_conf * t_conf_mask, (-1, 21))
            #print(type(dammy_label), type(self.train_conf.data))

            self.val_conf = F.flatten(conf)
            self.loss = F.softmax_cross_entropy(self.train_conf, self.val_conf)
            self.loss += F.mean_squared_error(self.mbox_loc * t_loc_mask, loc)
            self.accuracy = F.accuracy(self.train_conf, self.val_conf)
            return self.loss
        else:
            return self.mbox_loc, self.mbox_conf_softmax_reahpe

示例#49

0

显示文件

文件： __init__.py 项目： elda27/deepnet

    def __call__(self, x):

        store_activations = {}

        # down convolution
        for i in range(1, self.n_layers + 1):

            if i == 1:
                h = F.identity(x)
            else:
                h = F.max_pooling_nd(h, 2, stride=2)

            h = self['down_unet_block_%d' % (i)](h)
            h = self.down_conv_dropout(h)
            store_activations['down_unet_block_%d' % (i)] = h

        del h  # clear hidden layer

        # up convolution
        for i in range(self.n_layers - 1, 0, -1):

            if i == self.n_layers - 1:
                h = store_activations['down_unet_block_%d' % (i + 1)]
                del store_activations['down_unet_block_%d' % (i + 1)]  # clear
            else:
                h = h

            h = self['deconv_%d' % i](h)
            if self.batch_norm:
                h = self['bn_deconv_%d' % i](h)
            h = self.up_conv_activate_function(h)
            down_conv = store_activations['down_unet_block_%d' % (i)]
            del store_activations['down_unet_block_%d' % (i)]  # clear

            if self.n_dims == 2:
                h = F.concat([
                    h[:, :, 0:down_conv.shape[2], 0:down_conv.shape[3]],
                    down_conv
                ])  # fuse layer
            elif self.n_dims == 3:
                h = F.concat([
                    h[:, :, 0:down_conv.shape[2], 0:down_conv.shape[3],
                      0:down_conv.shape[4]], down_conv
                ])  # fuse layer
            del down_conv

            h = self['up_unet_block_%d' % i](h)
            h = self.up_conv_dropout(h)

            if i == 1:
                o = self['up_conv%d_3' % i](h)
                if self.n_dims == 2:
                    score = o[:, :, 0:x.shape[2], 0:x.shape[3]]
                elif self.n_dims == 3:
                    score = o[:, :, 0:x.shape[2], 0:x.shape[3], 0:x.shape[4]]

                self.score = score

        del h, o  # clear hidden layer

        return self.score

示例#50

0

显示文件

    def test_invlaid_axis_type(self):
        inputs = [numpy.random.rand(3, 4), numpy.random.rand(3, 1)]

        with self.assertRaises(TypeError):
            functions.concat(inputs, 'a')

示例#51

0

显示文件

 def output_from_seq_batch(self, y_seq_batch):
     y = F.concat(y_seq_batch, axis=0)
     y = F.dropout(y, ratio=self.dropout)
     return self.output(y)

示例#52

0

显示文件

 def _forward(self, *inputs):
     return functions.concat(inputs, self.axis)

示例#53

0

显示文件

文件： conversation_new.py 项目： hukuda222/Chat-Yojo-Bot

def sequence_embed(embed, xs):
    x_len = [len(x) for x in xs]
    x_section = np.cumsum(x_len[:-1])
    ex = embed(F.concat(xs, axis=0))
    exs = F.split_axis(ex, x_section, 0, force_tuple=True)
    return exs

示例#54

0

显示文件

    def __call__(self, x):
        heatmaps = []

        h = F.relu(self.conv1_1(x))
        h = F.relu(self.conv1_2(h))
        h = F.max_pooling_2d(h, ksize=2, stride=2)
        h = F.relu(self.conv2_1(h))
        h = F.relu(self.conv2_2(h))
        h = F.max_pooling_2d(h, ksize=2, stride=2)
        h = F.relu(self.conv3_1(h))
        h = F.relu(self.conv3_2(h))
        h = F.relu(self.conv3_3(h))
        h = F.relu(self.conv3_4(h))
        h = F.max_pooling_2d(h, ksize=2, stride=2)
        h = F.relu(self.conv4_1(h))
        h = F.relu(self.conv4_2(h))
        h = F.relu(self.conv4_3(h))
        h = F.relu(self.conv4_4(h))
        h = F.relu(self.conv5_1(h))
        h = F.relu(self.conv5_2(h))
        h = F.relu(self.conv5_3_CPM(h))
        feature_map = h

        # stage1
        h = F.relu(self.conv6_1_CPM(h))
        h = self.conv6_2_CPM(h)
        heatmaps.append(h)

        # stage2
        h = F.concat((h, feature_map), axis=1) # channel concat
        h = F.relu(self.Mconv1_stage2(h))
        h = F.relu(self.Mconv2_stage2(h))
        h = F.relu(self.Mconv3_stage2(h))
        h = F.relu(self.Mconv4_stage2(h))
        h = F.relu(self.Mconv5_stage2(h))
        h = F.relu(self.Mconv6_stage2(h))
        h = self.Mconv7_stage2(h)
        heatmaps.append(h)

        # stage3
        h = F.concat((h, feature_map), axis=1) # channel concat
        h = F.relu(self.Mconv1_stage3(h))
        h = F.relu(self.Mconv2_stage3(h))
        h = F.relu(self.Mconv3_stage3(h))
        h = F.relu(self.Mconv4_stage3(h))
        h = F.relu(self.Mconv5_stage3(h))
        h = F.relu(self.Mconv6_stage3(h))
        h = self.Mconv7_stage3(h)
        heatmaps.append(h)

        # stage4
        h = F.concat((h, feature_map), axis=1) # channel concat
        h = F.relu(self.Mconv1_stage4(h))
        h = F.relu(self.Mconv2_stage4(h))
        h = F.relu(self.Mconv3_stage4(h))
        h = F.relu(self.Mconv4_stage4(h))
        h = F.relu(self.Mconv5_stage4(h))
        h = F.relu(self.Mconv6_stage4(h))
        h = self.Mconv7_stage4(h)
        heatmaps.append(h)

        # stage5
        h = F.concat((h, feature_map), axis=1) # channel concat
        h = F.relu(self.Mconv1_stage5(h))
        h = F.relu(self.Mconv2_stage5(h))
        h = F.relu(self.Mconv3_stage5(h))
        h = F.relu(self.Mconv4_stage5(h))
        h = F.relu(self.Mconv5_stage5(h))
        h = F.relu(self.Mconv6_stage5(h))
        h = self.Mconv7_stage5(h)
        heatmaps.append(h)

        # stage6
        h = F.concat((h, feature_map), axis=1) # channel concat
        h = F.relu(self.Mconv1_stage6(h))
        h = F.relu(self.Mconv2_stage6(h))
        h = F.relu(self.Mconv3_stage6(h))
        h = F.relu(self.Mconv4_stage6(h))
        h = F.relu(self.Mconv5_stage6(h))
        h = F.relu(self.Mconv6_stage6(h))
        h = self.Mconv7_stage6(h)
        heatmaps.append(h)

        return heatmaps

示例#55

0

显示文件

文件： networks.py 项目： shimo8810/STVSR

 def __call__(self, x):
     h = F.concat((x[:, 0, :, :, :], x[:, 1, :, :, :]), axis=1)
     h = F.relu(self.conv1(h))
     h = F.relu(self.conv2(h))
     h = F.relu(self.conv3(h))
     return h

示例#56

0

显示文件

def run_disco_update():

    # read data
    DataN = get_data_N_rand(DataA,
                            N_pic=Gv.BatchSize,
                            imgH=64,
                            imgW=64,
                            keys=['x'])
    x_a = Variable(xp.asarray(DataN['x']))

    DataN = get_data_N_rand(DataB,
                            N_pic=Gv.BatchSize,
                            imgH=64,
                            imgW=64,
                            keys=['x'])
    x_b = Variable(xp.asarray(DataN['x']))

    # conversion
    x_ax_1 = Md['g_ab_c1'](x_a)
    x_ax_2 = Md['g_ab_c2'](x_a)
    x_ab_1 = Md['g_ab_d'](x_ax_1)
    x_ab_2 = Md['g_ab_d'](x_ax_2)

    x_ab = F.concat((x_ab_1, x_ab_2), axis=1)  #3ch,3ch⇒6ch

    x_b_1, x_b_2 = F.split_axis(x_b, 2, axis=1)  #6ch⇒3ch,3ch
    x_ba_1 = Md['g_ba_d1'](Md['g_ba_c'](x_b_1))
    x_ba_2 = Md['g_ba_d2'](Md['g_ba_c'](x_b_2))

    x_ba = x_ba_1 + x_ba_2

    # reconversion
    x_aba_1 = Md['g_ba_d1'](Md['g_ba_c'](x_ab_1))
    x_aba_2 = Md['g_ba_d2'](Md['g_ba_c'](x_ab_2))
    x_aba = x_aba_1 + x_aba_2

    x_bab_1 = Md['g_ab_d'](Md['g_ab_c1'](x_ba))
    x_bab_2 = Md['g_ab_d'](Md['g_ab_c2'](x_ba))
    x_bab = F.concat((x_bab_1, x_bab_2), axis=1)  #3ch,3ch⇒6ch

    # reconstruction loss
    recon_loss_a = F.mean_squared_error(x_a, x_aba)
    recon_loss_b = F.mean_squared_error(x_b, x_bab)

    # discriminate
    y_a_real, feats_a_real = Md['d_a'](x_a)
    y_a_fake, feats_a_fake = Md['d_a'](x_ba)

    y_b_real, feats_b_real = Md['d_b'](x_b)
    y_b_fake, feats_b_fake = Md['d_b'](x_ab)

    # GAN loss
    gan_loss_dis_a, gan_loss_gen_a = compute_loss_gan(y_a_real, y_a_fake)
    feat_loss_a = compute_loss_feat(feats_a_real, feats_a_fake)

    gan_loss_dis_b, gan_loss_gen_b = compute_loss_gan(y_b_real, y_b_fake)
    feat_loss_b = compute_loss_feat(feats_b_real, feats_b_fake)

    # compute loss
    total_loss_gen_a = (1. - Gv.Recon_rate) * (
        0.1 * gan_loss_gen_b +
        0.9 * feat_loss_b) + Gv.Recon_rate * recon_loss_a
    total_loss_gen_b = (1. - Gv.Recon_rate) * (
        0.1 * gan_loss_gen_a +
        0.9 * feat_loss_a) + Gv.Recon_rate * recon_loss_b

    gen_loss = total_loss_gen_a + total_loss_gen_b
    dis_loss = gan_loss_dis_a + gan_loss_dis_b

    if Iteration % 3 == 0:
        Md['d_a'].cleargrads()
        Md['d_b'].cleargrads()
        dis_loss.backward()
        Op['d_a'].update()
        Op['d_b'].update()

    else:
        Md['g_ab_c1'].cleargrads()
        Md['g_ab_c2'].cleargrads()
        Md['g_ab_d'].cleargrads()
        Md['g_ba_c'].cleargrads()
        Md['g_ba_d1'].cleargrads()
        Md['g_ba_d2'].cleargrads()

        gen_loss.backward()
        Op['g_ab_c1'].update()
        Op['g_ab_c2'].update()
        Op['g_ab_d'].update()
        Op['g_ba_c'].update()
        Op['g_ba_d1'].update()
        Op['g_ba_d2'].update()

示例#57

0

显示文件

文件： defCVAE_mnist.py 项目： ChisatoKasebayashi/Study

 def encode(self, x, y):
     h1 = F.tanh(self.le1(x))
     h2 = F.tanh(self.embed_e(y))
     mu = self.le2_mu(F.concat([h1, h2]))
     ln_var = self.le2_ln_var(F.concat([h1, h2]))  # log(sigma**2)
     return mu, ln_var

示例#58

0

显示文件

def find_closest_latent_state(real_o, generator, transition, classifier, args):
    trials = 400
    target = OptimizableLatentState(s_shape=(trials, 7), z_shape=(trials, 4))
    if not args.gpu < 0:
        target.to_gpu()

    _, channels, height, width = real_o.shape
    real_o = real_o.reshape((channels, height, width))
    real_o = F.broadcast_to(real_o, (trials, ) + real_o.shape)
    print('real_o shape: ', real_o.shape)

    optimizer = optimizers.Adam(alpha=1e-2)
    optimizer.setup(target)

    iterations = 1000

    def compute_loss(real_o, o_current):
        concat_image = F.concat((real_o, o_current), axis=1)
        classification_loss = classifier(concat_image)
        classification_loss = F.squeeze(classification_loss)
        l2_loss = F.batch_l2_norm_squared(real_o - o_current)
        assert classification_loss.shape == l2_loss.shape
        loss = l2_loss - classification_loss
        return loss

    s_current, z = target()
    for i in range(iterations):
        optimizer.target.cleargrads()

        s_next, _ = transition(s_current)
        # print('s_current shape: ', s_current.shape, 's_next shape: ', s_next.shape)
        x = F.concat((z, s_current, s_next), axis=1)
        x = F.reshape(x, shape=x.shape + (1, 1))
        o = generator(x)
        o_current, _ = F.split_axis(o, 2, axis=1, force_tuple=True)
        # print('o shape: ', o_current.shape)
        # print('real_o shape: ', real_o.shape)

        loss = compute_loss(real_o, o_current)
        mean_loss = F.mean(loss)
        mean_loss.backward()
        optimizer.update()
        mean_loss.unchain_backward()

        if i % 100 == 0:
            index = F.argmin(loss).data
            print('loss at: ', i, ' min index: ', index, ' min loss: ',
                  loss[index])

    # Select s and z with min loss
    s_current, z = target()
    s_next, _ = transition(s_current)
    x = F.concat((z, s_current, s_next), axis=1)
    x = F.reshape(x, shape=x.shape + (1, 1))
    o = generator(x)
    o_current, _ = F.split_axis(o, 2, axis=1, force_tuple=True)
    loss = compute_loss(real_o, o_current)

    index = F.argmin(loss).data
    print('min index: ', index, ' min loss: ', loss[index])

    s_min = s_current.data[index]
    print('s min: ', s_min)
    z_min = z.data[index]
    print('z min: ', z_min)
    return chainer.Variable(s_min), chainer.Variable(z_min)

示例#59

0

显示文件

文件： ModelA3_CNN.py 项目： ZMLight/PAKDD-code

    def __call__(self, x, rel_y, neighbor_entities, neighbor_dict, assign,
                 entities, relations, RC, EC):
        if self.layer == 0:
            return self.easy_case(x, neighbor_entities, neighbor_dict, assign,
                                  entities, relations)

        if len(neighbor_dict) == 1:
            x = [x]
        else:
            x = F.split_axis(x, len(neighbor_dict), axis=0)

        assignR = dict()
        bundle = defaultdict(list)
        for v, k in enumerate(neighbor_entities):
            for i in assign[v]:
                e = entities[i]
                if (e, k) in relations: r = relations[(e, k)] * 2
                else: r = relations[(k, e)] * 2 + 1
                assignR[(r, len(bundle[r]))] = v
                bundle[r].append(x[v])

        result = [0 for i in range(len(neighbor_dict))]
        rel_y = F.split_axis(rel_y, len(RC), axis=0)
        for r in bundle:
            rx = bundle[r]
            r_rep = rel_y[r // 2]
            if len(rx) == 1:
                rx = rx[0]
                tmp = F.pad(F.concat((rx, r_rep), axis=0), ((0, 0), (0, 1)),
                            'constant')
                tmp = F.reshape(tmp, (1, 1, 2, -1))
                if r % 2 == 0: rx = getattr(self, self.forwardH[0][0])(tmp)
                else: rx = getattr(self, self.forwardT[0][0])(tmp)
                rx = F.reshape(rx, (1, -1))
                result[assignR[(r, 0)]] = rx
            else:

                size = len(rx)
                #tempRx = list()

                for i in range(size):
                    #print ('shape is:', rx[i].shape)
                    rx[i] = F.pad(F.concat((rx[i], r_rep), axis=0),
                                  ((0, 0), (0, 1)), 'constant')
                    #print ('shape is:', rx[i].shape)
                rx = F.concat(rx, axis=0)
                #print ('shape is:', rx.shape)
                tmp = F.reshape(rx, (size, 1, 2, -1))
                #print ('shape is:', tmp.shape)
                if r % 2 == 0: rx = getattr(self, self.forwardH[0][0])(tmp)
                else: rx = getattr(self, self.forwardT[0][0])(tmp)
                #print ('shape is:', rx.shape)
                #print rx.data
                rx = F.reshape(rx, (size, -1))
                #print ('shape is:', rx.shape, size)
                rx = F.split_axis(rx, size, axis=0)
                for i, x in enumerate(rx):
                    result[assignR[(r, i)]] = x

        #if self.pooling_method=='val':
        #	result = self.valpooling(result,assign,RC,EC)
        if self.pooling_method == 'weight':
            sources = defaultdict(list)
            for ee in assign:
                for i in assign[ee]:
                    sources[i].append((ee, result[ee]))
            resultT = []
            for i, xxs in sorted(sources.items(), key=lambda x: x[0]):
                ii = entities[i]
                if len(xxs) == 1:
                    resultT.append(xxs[0][1])
                else:
                    tmplist = []
                    for txxs in xxs:
                        k = txxs[0]
                        ke = txxs[1]
                        '''
						if (ii,k) in relations:
							r = relations[(ii,k)]
						else if (k,ii) in relations:
							r = relations[(k,ii)]'''
                        tmplist.append(EC[k])
                    tmplist2 = []
                    if (sum(tmplist) != 0.0):
                        for t in range(len(xxs)):
                            tmplist2.append(xxs[t][1] * tmplist[t])
                        resultT.append(sum(tmplist2) / sum(tmplist))
                    else:
                        for t in range(len(xxs)):
                            tmplist2.append(xxs[t][1])
                        resultT.append(sum(tmplist2) / len(xxs))
            result = resultT

        if self.pooling_method == 'max':
            result = self.maxpooling(result, assign)
        if self.pooling_method == 'avg':
            result = self.averagepooling(result, assign)
        if self.pooling_method == 'sum':
            result = self.sumpooling(result, assign)
        result = F.concat(result, axis=0)

        #result = F.concat(resultT,axis=0)
        return result

示例#60

0

显示文件

    def __call__(self, x, z=None, mask=None):
        # pass in x and z shape = (batch, D, T), T is sequence length
        xp = self.xp
        h = self.h
        if self.is_self_attention:
            # expand dimension to 3 times first, then split, thus information of Q and V comes from x in self attenion.
            # self.W_QKV output (batchsize, n_units * 3, sentence_length), then split by axis=1
            Q, K, V = F.split_axis(
                self.W_QKV(x)
            )  # node_feature shape = (F,T,D), z shape = (F',T,D) indicate neighbor features, F' may not equals with F
        # F' may be edge_number in each frame, because it may come from EdgeRNN outputV(x), 3, axis=1)
        else:
            Q = self.W_Q(
                x
            )  # Query is come from x, the Value we want to extract is from z
            K, V = F.split_axis(self.W_KV(z), 2, axis=1)

        batch, n_units, n_querys = Q.shape  # n_querys is count of query which can also named as "T_q"
        _, _, n_keys = K.shape  # n_keys is sentence_length which can also named as "T_k"

        # Calculate Attention Scores with Mask for Zero-padded Areas
        # Perform Multi-head Attention using pseudo batching: h * batch_size
        # all together at once for efficiency
        # batch, n_units, n_querys : split by axis=1 to cut n_heads slice,
        # each slice shape is (batch, n_units//8, n_querys)
        # then concat in axis=0
        batch_Q = F.concat(F.split_axis(Q, h, axis=1), axis=0)
        batch_K = F.concat(F.split_axis(K, h, axis=1), axis=0)
        batch_V = F.concat(F.split_axis(V, h, axis=1), axis=0)
        assert (batch_Q.shape == (batch * h, n_units // h, n_querys))
        assert (batch_K.shape == (batch * h, n_units // h, n_keys))
        assert (batch_V.shape == (batch * h, n_units // h, n_keys))

        # Notice that this formula is different from paper Eqn 1., this time is transpose of Q matrix,
        # This F.matmul actually perform batch_matmul
        batch_A = F.matmul(batch_Q, batch_K, transa=True) \
            * self.scale_score  # shape = batch * h, T_q , T_k，matrix mat along the dimension of n_units//8
        # if mask==False，use -np.inf, so above code:[mask] * h
        if mask is not None:
            mask = xp.concatenate([mask] * h, axis=0)
            batch_A = F.where(mask, batch_A,
                              xp.full(batch_A.shape, -np.inf, 'f'))
        batch_A = F.softmax(
            batch_A, axis=2
        )  # axis=2 means attend along n_keys axis. Thus you can softly choose keys
        batch_A = F.where(xp.isnan(batch_A.data), xp.zeros(batch_A.shape, 'f'),
                          batch_A)  # push nan value to zero
        assert (batch_A.shape == (batch * h, n_querys, n_keys))

        # Calculate Weighted Sum before broad_cast (batch_A stores weights)
        # batch_A shape = (batch * h, 1, n_querys, n_keys); batch_V shape = (batch * h, n_units//8, 1, n_keys)
        batch_A, batch_V = F.broadcast(batch_A[:, None],
                                       batch_V[:, :, None])  # n_units//8 就是d_v
        # batch_C shape = batch * h, n_units//8, n_querys, n_keys, axis=3 means weighted sum along sequence
        batch_C = F.sum(batch_A * batch_V,
                        axis=3)  # shape = batch * h, n_units//8, n_querys
        assert (batch_C.shape == (batch * h, n_units // h, n_querys))
        # slice in h piece，then concat along axis=1, shape = (batch, n_units//8 * 8, n_querys), head = 8
        C = F.concat(F.split_axis(batch_C, h, axis=0), axis=1)
        # Notice that there is no n_keys in shape any more, because weighed sum already eliminated this dimension.
        assert (C.shape == (batch, n_units, n_querys))
        C = self.finishing_linear_layer(C)
        return C