Exemplo n.º 1
0
 def __call__(self, x):
     self.model.train = False
     Wh = chainer.Variable(self.Wh_data)
     Ww = chainer.Variable(self.Ww_data)
     tp = chainer.Variable(self.truth_probabiliry)
     t = chainer.Variable(self.t_dummy)
     model_loss = self.model(self.img(), t)
     #p = self.getprobability(model_loss)
     a = self.getbeforesoftmax(model_loss)
     #print(a.data, F.get_item(a, (0, self.label)).data)
     #print(F.sum(p**2).data)
     #p = (1 / F.sum(p**2)) .* (p**2)
     #ce = self.getCrossEntropy(model_loss)
     #print(t.data, ce.data, p.data, tp.data)
     #class_mse = ce#F.mean_squared_error(p, tp)
     #class_mse = F.sum(-F.log(p**2 / F.sum(p**2).data) * tp)
     #activation = -F.sum(a * tp)
     activation = -F.get_item(a, (0, self.label))
     #activation = -F.get_item(a, (0, self.label)) / F.sqrt(F.sum(a**2) - F.get_item(a, (0, self.label))**2)
     #activation = -F.get_item(a, (0, self.label)) + (F.sum(a**2) - F.get_item(a, (0, self.label))**2) / (a.data.shape[1] - 1)
     #class_mse = F.sum(-F.log(p) * tp)
     tv = self.tv_norm(self.img(), Wh, Ww) / np.prod(self.img().data.shape[1:])
     lp = (F.sum(self.img()**self.p) ** (1./self.p)) / np.prod(self.img().data.shape[1:])
     loss = self.lambda_a * activation + self.lambda_tv * tv + self.lambda_lp * lp
     chainer.report({'inv_loss': loss, 'activation': activation, 'tv': tv, 'lp': lp}, self)
     #print('inverter', x.data, class_mse.data, tv.data)
     return loss
Exemplo n.º 2
0
    def train(self, n_step=10000, policy=None):
        policy =self.get_policy(policy)
        state      = np.empty(shape=(n_step+1,) +self.shape, dtype=np.float32)
        action     = np.empty(shape=(n_step+1,),        dtype=np.int32)
        value_max  = np.empty(shape=(n_step+1,),        dtype=np.float32)
        value_act  = [None for i in range(n_step+1)]  # list of chainer.Variable
        reward     = np.empty(shape=(n_step+1,),        dtype=np.float32)
        for t in range(n_step+1):
            reward[t]     = game.get_reward()
            state[t]      = self.game.get_state()
            value_all     = F.reshape(self.q_a_given_s(state[t]),(-1,))
            action[t]     = policy(value_all.data)
            value_act[t]  = F.get_item(value_all, slice(action[t], action[t]+1))
            value_max[t]  = np.max(value_act[t].data)
            game.transit(action[t])

        value_predicted = F.get_item(F.concat(value_act, axis=0), (slice(0, n_step), ))
        value_actual    = (reward[1:] +self.discount_factor*value_max[1:])
        self.opt.update(F.mean_squared_error, value_predicted, value_actual)
        return reward.sum()
Exemplo n.º 3
0
 def check_forward(self, x_data):
     slices = []
     for i, s in enumerate(self.slices):
         if isinstance(s, numpy.ndarray):
             s = chainer.backends.cuda.cupy.array(s)
         if isinstance(s, list):
             s = chainer.backends.cuda.cupy.array(s, dtype=numpy.int32)
         slices.append(s)
     slices = tuple(slices)
     x = chainer.Variable(x_data)
     y = functions.get_item(x, slices)
     self.assertEqual(y.data.dtype, numpy.float32)
     numpy.testing.assert_equal(cuda.to_cpu(x_data)[self.slices],
                                cuda.to_cpu(y.data))
Exemplo n.º 4
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.get_item(x, self.slices)
     self.assertEqual(y.data.dtype, numpy.float)
     numpy.testing.assert_equal(cuda.to_cpu(x_data)[self.slices],
                                cuda.to_cpu(y.data))
Exemplo n.º 5
0
    def forward(self, h_s, c_s, n_speakers=None, to_train=1):
        # h_s: (1, B, F) h_0
        # c_s: (1, B, F) c_0
        # n_speakers: (B,) number of speakers (for test set None)
        # to_train: 1 to grab S+1 speakers while training; 0 to grab S speakers if given for inference 
        batch_size = h_s.shape[1]
        
        if n_speakers:

            # zeros: (B, 1, F)
            zeros = [cp.zeros((1, self.in_size)).astype(cp.float32) for i in range(batch_size)]
            #import sys
            #print(n_speakers)
            #sys.exit()
            max_speakers = max(n_speakers).tolist()
            # max_speakers = 2
            A = cp.array([])
            
            for i in range(max_speakers + to_train):
                h_s, c_s, _ = self.lstm(h_s, c_s, zeros)
                a_s = h_s[0]
                A = F.vstack((A, a_s)) if A.size else a_s
            #P = F.sigmoid(self.linear(A))    we will use sigmoid_cross_entropy
            P = self.linear(A)

            # dimension manipulation to get
            # A: (B, n_speakers, F)
            # P: (B, n_speakers, 1)
            A = F.swapaxes(A.reshape(max_speakers + to_train, batch_size, -1), 0, 1)
            P = F.swapaxes(P.reshape(max_speakers + to_train, batch_size, -1), 0, 1)

            # strip
            A = [F.get_item(a, slice(0, n_spk)) for a, n_spk in zip(A, n_speakers)]
            P = [F.get_item(p, slice(0, n_spk + to_train)) for p, n_spk in zip(P, n_speakers)]

        else:
            # don't know number of speakers so generate a_s and p_s until p_s < 0.5
            # cannot do this batch wise like above
            # process it for each group in the batch

            # zeros: (1, 1, F)
            zeros = [cp.zeros((1, self.in_size)).astype(cp.float32)]

            A = []
            for batch in range(batch_size):
                h_b, c_b = h_s[:, batch: batch + 1, :], c_s[:, batch: batch + 1, :]

                a = p = cp.array([])
                while True:
                    h_b, c_b, _ = self.lstm(h_b, c_b, zeros)
                    a_s = h_b[0]
                    p_s = F.sigmoid(self.linear(a_s))
                    if p_s.array[0] < 0.5: 
                        break
                    a = F.vstack((a, a_s)) if a.size else a_s
                    # p = F.vstack((p, p_s)) if p.size else p_s
                a = a if a.size else cp.zeros((1, h_s.shape[2])).astype(cp.float32)
                # p = p if p.size else Variable(np.array([[0]]).astype(np.float32))
                A.append(a)
                # P.append(p)

        P = P if to_train else None
        return A, None
Exemplo n.º 6
0
 def test_multiple_ellipsis(self):
     with self.assertRaises(ValueError):
         functions.get_item(self.x_data, (Ellipsis, Ellipsis))
Exemplo n.º 7
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.get_item(x, self.slices)
     self.assertEqual(y.data.dtype, numpy.float)
     numpy.testing.assert_equal(cuda.to_cpu(x_data)[self.slices],
                                cuda.to_cpu(y.data))
Exemplo n.º 8
0
 def f(x):
     return functions.get_item(x, slices)
Exemplo n.º 9
0
 def forward(self, inputs, device):
     x, = inputs
     slices = self._convert_slices(self.slices, device)
     y = functions.get_item(x, slices)
     return y,
Exemplo n.º 10
0
def _subsamplex(x, n):
    x = [F.get_item(xx, (slice(None, None, n), slice(None))) for xx in x]
    ilens = [xx.shape[0] for xx in x]
    return x, ilens
Exemplo n.º 11
0
    def CalcLoss(self, xs, ys):
        wxs = [
            np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32)
            for x in xs
        ]
        cxs = [
            np.array([source_char_ids.get(c, UNK) for c in list("".join(x))],
                     dtype=np.int32) for x in xs
        ]
        concat_wxs = np.concatenate(wxs)
        concat_cxs = np.concatenate(cxs)

        # Target token can be either a word or a char
        wcys = [
            np.array([target_ids.get(w, UNK) for w in y], dtype=np.int32)
            for y in ys
        ]

        eos = self.xp.array([EOS], 'i')
        ys_in = [F.concat([eos, y], axis=0) for y in wcys]
        ys_out = [F.concat([y, eos], axis=0) for y in wcys]

        # Both xs and ys_in are lists of arrays.
        wexs = sequence_embed(self.embed_xw, wxs)
        cexs = sequence_embed(self.embed_xc, cxs)

        wexs_f = wexs
        wexs_b = [F.get_item(wex, range(len(wex))[::-1]) for wex in wexs]
        cexs_f = cexs
        cexs_b = [F.get_item(cex, range(len(cex))[::-1]) for cex in cexs]

        eys = sequence_embed(self.embed_y, ys_in)

        batch = len(xs)
        # None represents a zero vector in an encoder.
        _, hfw = self.encoder_fw(None, wexs_f)
        h1, hbw = self.encoder_bw(None, wexs_b)
        _, hfc = self.encoder_fc(None, cexs_f)
        h2, hbc = self.encoder_bc(None, cexs_b)

        # 隠れ状態ベクトルの集合
        hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw]
        hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc]
        htw = list(map(lambda x, y: F.concat([x, y], axis=1), hfw, hbw))
        htc = list(map(lambda x, y: F.concat([x, y], axis=1), hfc, hbc))
        ht = list(map(lambda x, y: F.concat([x, y], axis=0), htw, htc))
        h = F.concat([h1, h2], axis=2)
        h_list, h_bar_list, c_s_list, z_s_list = self.decoder(h, ht, eys)

        # It is faster to concatenate data before calculating loss
        # because only one matrix multiplication is called.
        os = h_list
        os_len = [len(s) for s in os]
        os_section = np.cumsum(os_len[:-1])
        concat_os = F.concat(os, axis=0)
        concat_os_out = self.W(concat_os)
        concat_ys_out = F.concat(ys_out, axis=0)

        n_words = concat_ys_out.shape[0]
        loss = F.sum(
            F.softmax_cross_entropy(concat_os_out, concat_ys_out,
                                    reduce='no')) / n_words
        return loss
Exemplo n.º 12
0
    def translate(self, xs, max_length=100):
        print("Now translating")
        batch = len(xs)
        print("batch", batch)
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            wxs = [
                np.array([source_word_ids.get(w, UNK) for w in x],
                         dtype=np.int32) for x in xs
            ]
            wx_len = [len(wx) for wx in wxs]
            wx_section = np.cumsum(wx_len[:-1])
            valid_wx_section = np.insert(wx_section, 0, 0)
            cxs = [
                np.array(
                    [source_char_ids.get(c, UNK) for c in list("".join(x))],
                    dtype=np.int32) for x in xs
            ]

            wexs = sequence_embed(self.embed_xw, wxs)
            cexs = sequence_embed(self.embed_xc, cxs)

            wexs_f = wexs
            wexs_b = [wex[::-1] for wex in wexs]
            cexs_f = cexs
            cexs_b = [cex[::-1] for cex in cexs]

            _, hfw = self.encoder_fw(None, wexs_f)
            h1, hbw = self.encoder_bw(None, wexs_b)
            _, hfc = self.encoder_fc(None, cexs_f)
            h2, hbc = self.encoder_bc(None, cexs_b)

            hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw]
            hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc]
            htw = list(map(lambda x, y: F.concat([x, y], axis=1), hfw, hbw))
            htc = list(map(lambda x, y: F.concat([x, y], axis=1), hfc, hbc))
            ht = list(map(lambda x, y: F.concat([x, y], axis=0), htw, htc))

            ys = self.xp.full(batch, EOS, 'i')
            result = []
            h = F.concat([h1, h2], axis=2)
            for i in range(max_length):
                eys = self.embed_y(ys)
                eys = chainer.functions.split_axis(eys, batch, 0)
                h_list, h_bar_list, c_s_list, z_s_list = self.decoder(
                    h, ht, eys)
                cys = chainer.functions.concat(h_list, axis=0)
                wy = self.W(cys)
                ys = self.xp.argmax(wy.data, axis=1).astype('i')
                result.append(ys)
                h = F.transpose_sequence(h_list)[-1]
                h = F.reshape(h, (self.n_layers, h.shape[0], h.shape[1]))

        result = cuda.to_cpu(self.xp.stack(result).T)

        # Remove EOS taggs
        outs = []
        for y in result:
            inds = np.argwhere(y == EOS)
            if len(inds) > 0:
                y = y[:inds[0, 0]]
            outs.append(y)
        return outs
Exemplo n.º 13
0
    def calc_loss(self,
                  image_size,
                  predicted_grids,
                  gt_bbox_points,
                  objectness_scores,
                  normalize=True):
        predicted_bbox_points = self.get_corners(predicted_grids,
                                                 image_size,
                                                 scale_to_image_size=False)

        # 1. transform box coordinates to aabb coordinates for determination of iou
        predicted_bbox_points = predicted_bbox_points[
            0], predicted_bbox_points[4], predicted_bbox_points[
                3], predicted_bbox_points[7]
        predicted_bbox_points = F.stack(predicted_bbox_points, axis=1)

        # 2. find best prediction area for each gt bbox
        gt_bboxes_to_use_for_loss = []
        positive_anchor_indices = self.xp.empty((0, ), dtype=self.xp.int32)
        not_contributing_anchors = self.xp.empty((0, ), dtype=self.xp.int32)
        for index, gt_bbox in enumerate(gt_bbox_points):
            # determine which bboxes are positive boxes as they have high iou with gt and also which bboxes are negative
            # this is also used to train objectness classification
            gt_bbox = self.xp.tile(gt_bbox[None, ...],
                                   (len(predicted_bbox_points), 1))

            ious = bbox_iou(gt_bbox, predicted_bbox_points.data)
            positive_boxes = self.xp.where((ious[0] >= 0.7))
            not_contributing_boxes = self.xp.where(
                self.xp.logical_and(0.3 < ious[0], ious[0] < 0.7))
            if len(positive_boxes[0]) == 0:
                best_iou_index = ious[0, :].argmax()
                positive_anchor_indices = self.xp.concatenate(
                    (positive_anchor_indices, best_iou_index[None, ...]),
                    axis=0)
                gt_bboxes_to_use_for_loss.append(gt_bbox[0])
            else:
                positive_anchor_indices = self.xp.concatenate(
                    (positive_anchor_indices, positive_boxes[0]), axis=0)
                gt_bboxes_to_use_for_loss.extend(
                    gt_bbox[:len(positive_boxes[0])])
            not_contributing_anchors = self.xp.concatenate(
                (not_contributing_anchors, not_contributing_boxes[0]), axis=0)

        if len(gt_bboxes_to_use_for_loss) == 0:
            return Variable(self.xp.array(0, dtype=predicted_grids.dtype))

        gt_bboxes_to_use_for_loss = F.stack(gt_bboxes_to_use_for_loss)

        # filter predicted bboxes and only keep bboxes from those regions that actually contain a bbox
        predicted_bbox_points = F.get_item(predicted_bbox_points,
                                           positive_anchor_indices)

        # 3. calculate L1 loss for bbox regression
        loss = F.huber_loss(predicted_bbox_points, gt_bboxes_to_use_for_loss,
                            1)

        # 4. calculate objectness loss
        objectness_labels = self.xp.zeros(len(objectness_scores),
                                          dtype=self.xp.int32)
        objectness_labels[not_contributing_anchors] = -1
        objectness_labels[positive_anchor_indices] = 1

        objectness_loss = F.softmax_cross_entropy(
            objectness_scores,
            objectness_labels,
            ignore_label=-1,
        )

        return F.mean(loss), objectness_loss
Exemplo n.º 14
0
    def act_and_merge_features(self,
                               xs,
                               ws,
                               vs,
                               ms,
                               gcs=None,
                               get_att_score=False):
        hs = []
        pcs = []
        ass = []  # attention scores

        xp = cuda.get_array_module(xs[0])
        closs = chainer.Variable(xp.array(0, dtype='f'))

        if gcs is None:
            gcs = [None] * len(xs)
        for x, w, v, gc, mask in zip(xs, ws, vs, gcs, ms):
            # print('x', x.shape)
            if w is None and v is None:  # no words were found for devel/test data
                a = xp.zeros((len(x), self.chunk_embed_out_dim), dtype='f')
                pc = np.zeros(len(x), 'i')
                pcs.append(pc)
                h = F.concat((x, a), axis=1)  # (n, dt) @ (n, dc) => (n, dt+dc)
                hs.append(h)
                continue

            if w is not None:
                w = F.dropout(w, self.embed_dropout)

            ## calculate weight for w

            mask_ij = mask[0]
            if self.use_attention:  # wavg or wcon
                mask_i = mask[1]
                # print('w', w.shape)

                w_scores = self.biaffine(
                    F.dropout(x, self.biaffine_dropout),
                    F.dropout(w, self.biaffine_dropout))  # (n, m)
                w_scores = w_scores + mask_ij  # a masked element becomes 0 after softmax operation
                w_weight = F.softmax(w_scores)
                w_weight = w_weight * mask_i  # raw of char w/o no candidate words become a 0 vector

                # print('ww', w_weight.shape, '\n', w_weight)

            elif self.chunk_pooling_type == constants.AVG:
                w_weight = self.normalize(mask_ij, xp=xp)

            if not self.use_concat and self.chunk_vector_dropout > 0:
                mask_drop = xp.ones(w_weight.shape, dtype='f')
                for i in range(w_weight.shape[0]):
                    if self.chunk_vector_dropout > np.random.rand():
                        mask_drop[i] = xp.zeros(w_weight.shape[1], dtype='f')
                w_weight = w_weight * mask_drop

            ## calculate weight for v

            if self.use_concat:
                mask_ik = mask[2]
                n = x.shape[0]
                wd = self.chunk_embed_dim_merged  #w.shape[1]
                if self.chunk_pooling_type == constants.WCON:
                    ikj_table = mask[3]
                    v_weight0 = F.concat(
                        [
                            F.expand_dims(  # (n, m) -> (n, k)
                                F.get_item(w_weight[i], ikj_table[i]),
                                axis=0) for i in range(n)
                        ],
                        axis=0)
                    # print('mask_ik', mask_ik.shape, '\n', mask_ik)
                    # print('v_weight0', v_weight0.shape, '\n', v_weight0)
                    v_weight0 *= mask_ik
                    # print('ikj_table', ikj_table)

                else:
                    v_weight0 = mask_ik

                v_weight = F.transpose(v_weight0)  # (n,k)
                v_weight = F.expand_dims(v_weight, 2)  # (k,n)
                v_weight = F.broadcast_to(
                    v_weight, (self.chunk_concat_num, n, wd))  # (k,n,wd)
                v_weight = F.concat(v_weight, axis=1)  # (k,n*wd)

                if self.chunk_vector_dropout > 0:
                    mask_drop = xp.ones(v_weight.shape, dtype='f')
                    for i in range(v_weight.shape[0]):
                        if self.chunk_vector_dropout > np.random.rand():
                            mask_drop[i] = xp.zeros(v_weight.shape[1],
                                                    dtype='f')
                    v_weight *= mask_drop

            ## calculate summary vector a
            if self.use_average:  # avg or wavg
                a = F.matmul(w_weight, w)  # (n, m) * (m, dc)  => (n, dc)

            else:  # con or wcon
                v = F.concat(v, axis=1)
                a = v * v_weight
                # print('a', a.shape, a)

            ## get predicted (attended) chunks
            if self.use_attention:  # wavg or wcon
                if self.chunk_pooling_type == constants.WAVG:
                    weight = w_weight
                else:
                    weight = v_weight0
                pc = minmax.argmax(weight, axis=1).data
                if xp is cuda.cupy:
                    pc = cuda.to_cpu(pc)
                pcs.append(pc)

            #     if get_att_score:
            #         ascore = minmax.max(weight, axis=1).data
            #         ass.append(ascore)

            #     ncand = [sum([1 if val >= 0 else 0 for val in raw]) for raw in _mask]
            #     print('pred', pc)
            #     print('gold', gc)
            #     print('ncand', ncand)
            #     print('weight', weight.shape, weight.data)
            #     print('weight')
            #     for i, e in enumerate(weight.data):
            #         print(i, e)

            h = F.concat((x, a), axis=1)  # (n, dt) @ (n, dc) => (n, dt+dc)

            hs.append(h)

        if closs.data == 0:
            closs = None
        else:
            closs /= len(xs)

        if get_att_score:
            return closs, pcs, hs, ass
        else:
            return closs, pcs, hs
Exemplo n.º 15
0
 def f(x):
     return functions.get_item(x, self.slices)
Exemplo n.º 16
0
 def f(x):
     y = functions.get_item(x, self.slices)
     return y * y
Exemplo n.º 17
0
def main(id):
    model_path = "/efs/fMRI_AE/SimpleFCAE_E32D32/model/model_iter_108858"

    gpu = 0
    get_device_from_id(gpu).use()
    """NibDataset
    def __init__(self, directory: str, crop: list):
    """
    crop = [[9, 81], [11, 99], [0, 80]]
    test_dataset = NibDataset("/data/test", crop=crop)

    mask = load_mask_nib("/data/mask/average_optthr.nii", crop)
    """SimpleFCAE_E32D32
    def __init__(self, mask, r: int, in_mask: str, out_mask: str):
    """
    model = Model(mask, 2, "mask", "mask")
    load_npz(model_path, model)
    model.to_gpu()

    # feature_idx = 0
    # feature_idx = (0, 4, 5, 5) # == [0, 9/2, 11/2, 10/2]
    # feature_idx = (0, 1, 1, 1)
    feature_idx = (0, 2, 7, 4)
    resample_size = 100
    batch_size = 10
    noise_level = 0.2

    for i in range(len(test_dataset)):
        if i % 8 != id:
            continue
        print("{:4}/{:4}".format(i, len(test_dataset)))
        subject = test_dataset.get_subject(i)
        frame = test_dataset.get_frame(i)
        test_img = xp.asarray(test_dataset[i])

        resample_remain = resample_size
        resample_processed = 0
        ret = xp.zeros(test_img.shape)
        while resample_remain > 0:
            batch_size_this_loop = min(batch_size, resample_remain)
            resample_remain -= batch_size_this_loop

            batch = xp.broadcast_to(
                test_img, chain((batch_size_this_loop, ), test_img.shape))
            sigma = noise_level / (xp.max(test_img) - xp.min(test_img))
            batch += sigma * xp.random.randn(*batch.shape)

            x = Variable(batch)

            feature = model.extract(x)
            assert feature.shape == (batch_size, 1, 9, 11, 10)
            feature = F.sum(feature, axis=0)
            assert feature.shape == (1, 9, 11, 10)
            feature = F.get_item(feature, feature_idx)
            feature.backward()
            grad = xp.mean(x.grad, axis=0)
            ret = (ret * resample_processed + grad * batch_size_this_loop) / (
                resample_processed + batch_size_this_loop)
            model.cleargrads()

        xp.save(
            "/efs/fMRI_AE/SimpleFCAE_E32D32/grad/sensitivity_map_feature_{}_{}_{}_subject{:03d}_frame{:03d}"
            .format(feature_idx[1], feature_idx[2], feature_idx[3], subject,
                    frame), ret)
Exemplo n.º 18
0
 def f(x):
     y = functions.get_item(x, self.slices)
     return y * y
def mean_dice_coefficient(dice_coefficients, ret_nan=True):
    if ret_nan:
        xp = cuda.get_array_module(dice_coefficients)
        selector = ~xp.isnan(dice_coefficients.data)
        dice_coefficients = F.get_item(dice_coefficients, selector)
    return F.mean(dice_coefficients, keepdims=True)
Exemplo n.º 20
0
    def CalcLoss(self, xs, ys):
        char_hidden = []
        '''
        wxs = [x[0] for x in xs]
        unk_xs = [x[1] for x in xs]
        '''
        wxs = [
            np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32)
            for x in xs
        ]
        unk_words = list(map(lambda x, y: np.array(y)[x == UNK], wxs, xs))
        unk_xs = list(
            map(
                lambda x: np.array([
                    np.array([source_char_ids.get(c, UNK) for c in list(w)],
                             dtype=np.int32) for w in x
                ]), unk_words))
        unk_pos = [np.where(x == UNK)[0] for x in wxs]
        concat_wxs = np.concatenate(wxs)

        wys = [
            np.array([target_word_ids.get(w, UNK) for w in y], dtype=np.int32)
            for y in ys
        ]

        eos = self.xp.array([EOS], 'i')
        ys_in = [F.concat([eos, y], axis=0) for y in wys]
        ys_out = [F.concat([y, eos], axis=0) for y in wys]

        # Both xs and ys_in are lists of arrays.
        exs = sequence_embed(self.embed_x, wxs)
        # Convert an UNK word vector into a char-hidden vector
        exs = list(
            map(
                lambda s, t, u: get_unk_hidden_vector(
                    s, t, u, self.embed_xc, self.char_encoder, char_hidden),
                exs, unk_pos, unk_xs))
        exs_f = exs
        exs_b = [F.get_item(ex, range(len(ex))[::-1]) for ex in exs]
        eys = sequence_embed(self.embed_y, ys_in)

        batch = len(xs)
        # None represents a zero vector in an encoder.
        _, hf = self.encoder_f(None, exs_f)
        _, hb = self.encoder_b(None, exs_b)
        # 隠れ状態ベクトルの集合
        hb = [F.get_item(h, range(len(h))[::-1]) for h in hb]
        ht = list(map(lambda x, y: F.concat([x, y], axis=1), hf, hb))

        h_list, h_bar_list, c_s_list, z_s_list = self.decoder(None, ht, eys)

        # It is faster to concatenate data before calculating loss
        # because only one matrix multiplication is called.
        os = h_list
        os_len = [len(s) for s in os]
        os_section = np.cumsum(os_len[:-1])
        concat_os = F.concat(os, axis=0)
        concat_os_out = self.W(concat_os)
        concat_ys_out = F.concat(ys_out, axis=0)

        loss_w = 0
        loss_c1 = 0
        loss_c2 = 0

        # If predicted word is UNK
        concat_pred_w = F.argmax(concat_os_out, axis=1)
        #concat_isUNK = concat_pred_w==0

        is_unk = concat_pred_w.data == UNK
        count_unk_with_no_att = 0
        if UNK in concat_pred_w.data:
            print(True)
            ##案2:
            #全てconcat
            #総単語数*2048
            concat_c_s = F.concat(c_s_list, axis=0)
            concat_h_bar = F.concat(h_bar_list, axis=0)

            c_ss = concat_c_s[is_unk]
            h_bars = concat_h_bar[is_unk]
            c = F.concat([c_ss, h_bars], axis=1)
            ds_hats = F.relu(self.W_hat(c))

            z_s_len = [len(z_s) - 1 for z_s in z_s_list]
            z_s_section = np.cumsum(z_s_len[:-1])
            valid_z_s_section = np.insert(z_s_section, 0, 0)
            abs_z_s_list = [
                z_s_list[i] + valid_z_s_section[i]
                for i in range(len(z_s_list))
            ]
            concat_z_s = F.concat(abs_z_s_list, axis=0)
            z_ss = concat_z_s[is_unk]

            true_wys = concat_ys_out[is_unk]
            #"予想単語==UNK"の各ケースについて個別に処理
            for i, wy in enumerate(true_wys):
                bow = self.xp.array([BOW], 'i')
                wy = int(wy.data)
                print(target_words[wy])
                if wy != UNK and wy != EOS:
                    cys = np.array(
                        [[target_char_ids[c] for c in list(target_words[wy])]],
                        np.int32)
                elif wy == UNK:
                    #本来ありえない
                    cys = np.array([[target_char_ids['UNK']]], np.int32)
                elif wy == EOS:
                    cys = np.array([[target_char_ids['BOW']]], np.int32)
                cys_in = [F.concat([bow, y], axis=0) for y in cys]
                cys_out = [F.concat([y, bow], axis=0) for y in cys]
                concat_cys_out = F.concat(cys_out, axis=0)
                ceys = sequence_embed(self.embed_yc, cys_in)
                z_s = int(z_ss[i].data)

                ds_hat = F.reshape(ds_hats[i], (1, 1, ds_hats[i].shape[0]))
                if concat_wxs[z_s] != UNK:
                    #attentionなし文字ベースdecoder
                    _, cos = self.char_decoder(ds_hat, ceys)
                    print("attなし")
                    concat_cos = F.concat(cos, axis=0)
                    concat_cos_out = self.W_char(concat_cos)
                    loss_c1 = loss_c1 + F.sum(
                        F.softmax_cross_entropy(
                            concat_cos_out, concat_cys_out, reduce='no'))
                    count_unk_with_no_att += 1
                else:
                    #attentionあり文字ベースdecoder
                    ht = char_hidden[z_s]
                    h_list, h_bar_list, c_s_list, z_s_list = self.char_att_decoder(
                        ds_hat, ht, ceys)
                    print("attあり")
                    concat_cos = F.concat(h_list, axis=0)
                    concat_cos_out = self.W_char(concat_cos)
                    loss_c2 = loss_c2 + F.sum(
                        F.softmax_cross_entropy(
                            concat_cos_out, concat_cys_out, reduce='no'))
        else:
            print(False)
        n_words = concat_ys_out.shape[0]
        n_unk = np.sum(is_unk)
        count_unk_with_att = n_unk - count_unk_with_no_att
        count_kno = n_words - n_unk
        loss_w = F.sum(
            F.softmax_cross_entropy(concat_os_out[is_unk != 1],
                                    concat_ys_out[is_unk != 1],
                                    reduce='no'))
        loss = F.sum(loss_w + Alpha * loss_c1 + Beta * loss_c2) / n_words
        return loss, count_kno, count_unk_with_no_att, count_unk_with_att
Exemplo n.º 21
0
    def test_get_item_unsupported_advanced_index(self, slices):
        model = chainer.Sequential(lambda x: F.get_item(x, slices=slices))
        x = input_generator.increasing(2, 3, 4)

        with pytest.raises(ValueError):
            export(model, x)
Exemplo n.º 22
0
 def forward(self, inputs, device):
     x, = inputs
     y = functions.get_item(x, self.slices)
     return y,
Exemplo n.º 23
0
 def test_multiple_ellipsis(self):
     with self.assertRaises(ValueError):
         functions.get_item(self.x_data, (Ellipsis, Ellipsis))
Exemplo n.º 24
0
 def test_advanced_indexing(self):
     with self.assertRaises(ValueError):
         functions.get_item(self.x_data, ([0, 0, 0],))
Exemplo n.º 25
0
 def test_too_many_indices(self):
     with self.assertRaises(type_check.InvalidType):
         functions.get_item(self.x_data, (0, 0, 0, 0))
Exemplo n.º 26
0
 def test_too_many_indices(self):
     with self.assertRaises(type_check.InvalidType):
         functions.get_item(self.x_data, (0, 0, 0, 0))
def forward(enc_words, dec_words, model, ARR):
    """
    forwardの計算をする関数
    :param enc_words: 入力文
    :param dec_words: 出力文
    :param model: モデル
    :param ARR: numpyかcuda.cupyのどちらか
    :return loss: 損失
    """
    # バッチサイズを記録
    batch_size = len(enc_words[0])
    # モデルの中に記録されている勾配のリセット
    model.reset()
    # 入力文の中で使用されている単語をチェックするためのリストを用意
    enc_key = enc_words.T
    # Encoderに入力する文をVariable型に変更する
    enc_words = [Variable(ARR.array(row, dtype='int32')) for row in enc_words]
    # Encoderの計算
    model.encode(enc_words)
    # 損失の初期化
    loss = Variable(ARR.zeros((), dtype='float32'))
    # <eos>をデコーダーに読み込ませる
    t = Variable(ARR.array([0 for _ in range(batch_size)], dtype='int32'))
    # デコーダーの計算
    for w in dec_words:
        # 1単語ずつをデコードする
        y, att, lambda_ = model.decode(t)
        # 正解単語をVariable型に変換
        t = Variable(ARR.array(w, dtype='int32'))

        # Generative Modeにより計算された単語のlog_softmaxをとる
        s = functions.log_softmax(y)
        # Attentionの重みのlog_softmaxをとる
        att_s = functions.log_softmax(att)
        # lambdaをsigmoid関数にかけることで、0~1の値に変更する
        lambda_s = functions.reshape(functions.sigmoid(lambda_),
                                     (batch_size, ))
        # Generative Modeの損失の初期化
        Pg = Variable(ARR.zeros((), dtype='float32'))
        # Copy Modeの損失の初期化
        Pc = Variable(ARR.zeros((), dtype='float32'))
        # lambdaのバランスを学習するための損失の初期化
        epsilon = Variable(ARR.zeros((), dtype='float32'))
        # ここからバッチ内の一単語ずつの損失を計算する、for文を回してしまっているところがダサい・・・
        counter = 0
        for i, words in enumerate(w):
            # -1は学習しない単語につけているラベル
            if words != -1:
                # Generative Modeの損失の計算
                Pg += functions.get_item(functions.get_item(
                    s, i), words) * functions.reshape(
                        (1.0 - functions.get_item(lambda_s, i)), ())
                counter += 1
                # もし入力文の中に出力したい単語が存在すれば
                if words in enc_key[i]:
                    # Copy Modeの計算をする
                    Pc += functions.get_item(
                        functions.get_item(att_s, i),
                        list(enc_key[i]).index(words)) * functions.reshape(
                            functions.get_item(lambda_s, i), ())
                    # ラムダがCopy Modeよりになるように学習
                    epsilon += functions.log(functions.get_item(lambda_s, i))
                # 入力文の中に出力したい単語がなければ
                else:
                    # ラムダがGenerative Modeよりになるように学習
                    epsilon += functions.log(1.0 -
                                             functions.get_item(lambda_s, i))
        # それぞれの損失をバッチサイズで割って、合計する
        Pg *= (-1.0 / np.max([1, counter]))
        Pc *= (-1.0 / np.max([1, counter]))
        epsilon *= (-1.0 / np.max([1, counter]))
        loss += Pg + Pc + epsilon
    return loss