def check_invalid_reduce(self, x, t):
     with chainer.using_config('use_cudnn', self.use_cudnn):
         with self.assertRaises(ValueError):
             functions.softmax_cross_entropy(
                 x, t, self.normalize, self.cache_score,
                 reduce='unknown_reduce_type',
                 enable_double_backprop=self.enable_double_backprop)
Exemplo n.º 2
0
def train_one(gen, dis, optimizer_gen, optimizer_dis, x_batch, y_batch, gpu_device):
    batch_size = len(x_batch)
    if gpu_device == None:
        xp = np
    else:
        xp = cuda.cupy
    # train generator
    y = Variable(xp.asarray(y_batch))
    t = Variable(xp.asarray(y_batch + 1))
    z = Variable(xp.random.uniform(-1, 1, (batch_size, LATENT_SIZE)).astype(np.float32))
    x = gen((z, y))
    y1 = dis(x)
    loss_gen = F.softmax_cross_entropy(y1, t)
    loss_dis = F.softmax_cross_entropy(y1, Variable(xp.zeros(batch_size).astype(np.int32)))
    # train discriminator
    y2 = dis(Variable(xp.asarray(x_batch)))
    loss_dis += F.softmax_cross_entropy(y2, t)

    optimizer_gen.zero_grads()
    loss_gen.backward()
    optimizer_gen.update()

    optimizer_dis.zero_grads()
    loss_dis.backward()
    optimizer_dis.update()

    return (float(loss_gen.data), float(loss_dis.data))
Exemplo n.º 3
0
def train_one(gen, dis, optimizer_gen, optimizer_dis, x_batch, gpu_device):
    batch_size = len(x_batch)
    if gpu_device == None:
        xp = xp
    else:
        xp = cuda.cupy
    # train generator
    z = Variable(xp.random.uniform(-1, 1, (batch_size, LATENT_SIZE)).astype(np.float32))
    x = gen(z)
    y1 = dis(x)
    loss_gen = F.softmax_cross_entropy(y1, Variable(xp.zeros(batch_size).astype(np.int32)))
    loss_dis = F.softmax_cross_entropy(y1, Variable(xp.ones(batch_size).astype(np.int32)))
    # train discriminator
    x2 = Variable(xp.asarray(np.reshape(x_batch, (batch_size, 1, 28, 28))))
    y2 = dis(x2)
    loss_dis += F.softmax_cross_entropy(y2, Variable(xp.zeros(batch_size).astype(np.int32)))

    optimizer_gen.zero_grads()
    loss_gen.backward()
    optimizer_gen.update()

    optimizer_dis.zero_grads()
    loss_dis.backward()
    optimizer_dis.update()

    return (loss_gen.data, loss_dis.data)
Exemplo n.º 4
0
    def __call__(self, jline, eline):
        gh = []
        self.H.reset_state()
        for w in jline:
            wid = self.jvocab[w]
            x_k = self.embedx(Variable(np.array([wid], dtype=np.int32)))
            h = self.H(x_k)
            gh.append(np.copy(h.data[0]))

        x_k = self.embedx(Variable(np.array([self.jvocab[EOS]], dtype=np.int32)))
        tx = Variable(np.array([self.evocab[eline[0]]], dtype=np.int32))
        h = self.H(x_k)
        ct = Variable(mk_ct(gh, h.data[0]))
        h2 = F.tanh(self.Wc1(ct) + self.Wc2(h))
        accum_loss = F.softmax_cross_entropy(self.W(h2), tx)

        for i in range(len(eline)):
            wid = self.evocab[eline[i]]
            x_k = self.embedy(Variable(np.array([wid], dtype=np.int32)))
            next_w = eline[i + 1] if i < len(eline) - 1 else EOS
            next_wid = self.evocab[next_w]
            tx = Variable(np.array([next_wid], dtype=np.int32))
            h = self.H(x_k)
            ct = Variable(mk_ct(gh, h.data[0]))
            h2 = F.tanh(self.Wc1(ct) + self.Wc2(h))
            loss = F.softmax_cross_entropy(self.W(h2), tx)
            accum_loss += loss

        return accum_loss
Exemplo n.º 5
0
    def forward(self, x_data, y_data, train=True):
        x = Variable(x_data, volatile=not train)
        t = Variable(y_data, volatile=not train)

        h = F.relu(self.bn1_1(self.conv1_1(x)))
        h = F.relu(self.bn1_2(self.conv1_2(h)))
        h = F.max_pooling_2d(h, 2, stride=2)
        h = F.dropout(h, ratio=0.25, train=train)

        h = F.relu(self.bn2_1(self.conv2_1(h)))
        h = F.relu(self.bn2_2(self.conv2_2(h)))
        h = F.max_pooling_2d(h, 2, stride=2)
        h = F.dropout(h, ratio=0.25, train=train)

        h = F.relu(self.bn3_1(self.conv3_1(h)))
        h = F.relu(self.bn3_2(self.conv3_2(h)))
        h = F.relu(self.bn3_3(self.conv3_3(h)))
        h = F.relu(self.bn3_4(self.conv3_4(h)))
        h = F.max_pooling_2d(h, 2, stride=2)
        h = F.dropout(h, ratio=0.25, train=train)

        h = F.dropout(F.relu(self.fc4(h)), train=train, ratio=0.5)
        h = F.dropout(F.relu(self.fc5(h)), train=train, ratio=0.5)
        h = self.fc6(h)

        if train:
            return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
        else:
            return F.softmax_cross_entropy(h, t), F.accuracy(h, t), h
Exemplo n.º 6
0
    def forward(self, x_data, y_data, train=True):
        x = Variable(x_data, volatile=not train)
        t = Variable(y_data, volatile=not train)

        h = self.prelu1_1(self.bn1_1(self.conv1_1(x)))
        h = self.prelu1_2(self.bn1_2(self.conv1_2(h)))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.prelu2_1(self.bn2_1(self.conv2_1(h)))
        h = self.prelu2_2(self.bn2_2(self.conv2_2(h)))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.prelu3_1(self.conv3_1(h))
        h = self.prelu3_2(self.conv3_2(h))
        h = self.prelu3_3(self.conv3_3(h))
        h = F.max_pooling_2d(h, 2, stride=1)

        h = self.prelu4_1(self.conv4_1(h))
        h = self.prelu4_2(self.conv4_2(h))
        h = self.prelu4_3(self.conv4_3(h))
        h = F.max_pooling_2d(h, 2, stride=1)

        h = self.prelu5_1(self.conv5_1(h))
        h = self.prelu5_2(self.conv5_2(h))
        h = self.prelu5_3(self.conv5_3(h))
        h = F.max_pooling_2d(h, 2, stride=1)

        h = F.dropout(self.prelu6(self.fc6(h)), train=train, ratio=0.5)
        h = F.dropout(self.prelu7(self.fc7(h)), train=train, ratio=0.5)
        h = self.fc8(h)

        if train:
            return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
        else:
            return F.softmax_cross_entropy(h, t), F.accuracy(h, t), h
Exemplo n.º 7
0
    def forward(self, x_data, y_data, train=True):
        x = chainer.Variable(x_data, volatile=not train)
        t = chainer.Variable(y_data, volatile=not train)

        h = F.max_pooling_2d(
            F.relu(self.norm1(self.conv1(x))),  3, stride=2, pad=1)
        h = F.max_pooling_2d(
            F.relu(self.norm2(self.conv2(h))), 3, stride=2, pad=1)

        h = self.inc3a(h)
        h = self.inc3b(h)
        h = self.inc3c(h)
        h = self.inc4a(h)

        a = F.average_pooling_2d(h, 5, stride=3)
        a = F.relu(self.norma(self.conva(a)))
        a = F.relu(self.norma2(self.lina(a)))
        a = self.outa(a)
        a = F.softmax_cross_entropy(a, t)

        h = self.inc4b(h)
        h = self.inc4c(h)
        h = self.inc4d(h)

        b = F.average_pooling_2d(h, 5, stride=3)
        b = F.relu(self.normb(self.convb(b)))
        b = F.relu(self.normb2(self.linb(b)))
        b = self.outb(b)
        b = F.softmax_cross_entropy(b, t)

        h = self.inc4e(h)
        h = self.inc5a(h)
        h = F.average_pooling_2d(self.inc5b(h), 7)
        h = self.out(h)
        return 0.3 * (a + b) + F.softmax_cross_entropy(h, t), F.accuracy(h, t)
Exemplo n.º 8
0
    def forward(self, x_data, y_data, train=True, models=None):
        VGG_mini = models["VGG_mini"]
        VGG_mini2 = models["VGG_mini2"]
        VGG_mini3 = models["VGG_mini3"]
        
        x = Variable(x_data, volatile=not train)
        t = Variable(y_data, volatile=not train)

        h = F.relu(self.conv1_1(x))
        h = F.relu(self.conv1_2(h))
        h = F.relu(self.conv1_3(h))
        h = F.relu(self.conv1_4(h))
        h = F.max_pooling_2d(h, 2, stride=2)
        h = F.dropout(h, ratio=0.25, train=train)
        
        h = F.relu(self.conv1_5(h))
        h = F.max_pooling_2d(h, 2, stride=2)
        h = F.dropout(h, ratio=0.25, train=train)
        
        h = self.fc(h)

        if train:
            return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
        else:
            # return F.softmax_cross_entropy(h, t), F.accuracy(h, t), h
            return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
 def test_variable_assertion(self):
     wrong_inst_class_weight = chainer.Variable(
         numpy.array([0, 0], dtype='f'))
     with self.assertRaises(ValueError):
         functions.softmax_cross_entropy(
             self.x, self.t, class_weight=wrong_inst_class_weight,
             enable_double_backprop=self.enable_double_backprop)
Exemplo n.º 10
0
def d_norm(flag, dis, img1, img2):
    yl = dis(img1, img2)
    if flag == 0:
        return F.softmax_cross_entropy(yl, Variable(xp.zeros(batchsize, dtype=np.int32)))
    elif flag == 1:
        return F.softmax_cross_entropy(yl, Variable(xp.ones(batchsize, dtype=np.int32)))
    else:
        raise "norm flag should be either 0 / 1"
Exemplo n.º 11
0
    def test_NNET_train(self):
        struct = 'w2vec(/project/nakamura-lab01/Work/truong-dq/chainer/vidnn/exp/word2vec_truecase_200/vectors.bin):lstm(200-2):linear(2-2)'
        # struct = 'embed(2-200):lstm(200-2):linear(2-2)'
        nnet = NNET_Model.parse_structure(struct)

        # Testing variable
        test_var = chainer.Variable(np.asarray([1], dtype=np.int32))
        output_before_train = nnet(test_var).data


        inp = chainer.Variable(np.asarray([1], dtype=np.int32))
        target = chainer.Variable(np.asarray([0], dtype=np.int32))
        output = nnet(inp)
        loss = F.softmax_cross_entropy(output, target)
        optimizer = optimizers.SGD(lr=0.1)
        optimizer.setup(nnet)
        optimizer.zero_grads()
        loss.backward()
        optimizer.update()
        
        inp = chainer.Variable(np.asarray([0], dtype=np.int32))
        target = chainer.Variable(np.asarray([0], dtype=np.int32))
        output = nnet(inp)
        loss = F.softmax_cross_entropy(output, target)
        optimizer = optimizers.SGD(lr=0.1)
        optimizer.setup(nnet)
        optimizer.zero_grads()
        loss.backward()
        optimizer.update()

        
        nnet.save('test_output')
        nnet_2 = NNET_Model.load('test_output')
        nnet.forget_history()
        nnet_2.forget_history()

        np.testing.assert_equal(nnet[0][1].W.data, nnet_2[0][1].W.data)
        np.testing.assert_equal(nnet[1][1].upward.W.data, nnet_2[1][1].upward.W.data)
        np.testing.assert_equal(nnet[1][1].lateral.W.data, nnet_2[1][1].lateral.W.data)
        np.testing.assert_equal(nnet[1][1].upward.b.data, nnet_2[1][1].upward.b.data)

        output_after_train = nnet(test_var).data
        output_after_load = nnet_2(test_var).data
        
       
        after_first_layer_nnet = nnet[0][1](test_var)
        after_first_layer_nnet_2 = nnet_2[0][1](test_var)
        np.testing.assert_equal(after_first_layer_nnet.data, after_first_layer_nnet_2.data)
        
        after_first_layer_nnet.volatile = False
        after_first_layer_nnet_2.volatile = False
        after_second_layer_nnet = nnet[1][1](after_first_layer_nnet)
        after_second_layer_nnet_2 = nnet_2[1][1](after_first_layer_nnet_2)
        np.testing.assert_equal(after_second_layer_nnet.data, after_second_layer_nnet_2.data)
        
        assert (output_before_train != output_after_train).any()
        assert (output_before_train != output_after_load).any()
        np.testing.assert_equal(output_after_train, output_after_load)
Exemplo n.º 12
0
    def forward(self, *inputs):
        batch = len(inputs) // 6
        lefts = inputs[0: batch]
        rights = inputs[batch: batch * 2]
        dests = inputs[batch * 2: batch * 3]
        labels = inputs[batch * 3: batch * 4]
        sequences = inputs[batch * 4: batch * 5]
        leaf_labels = inputs[batch * 5: batch * 6]

        inds = numpy.argsort([-len(l) for l in lefts])
        # Sort all arrays in descending order and transpose them
        lefts = F.transpose_sequence([lefts[i] for i in inds])
        rights = F.transpose_sequence([rights[i] for i in inds])
        dests = F.transpose_sequence([dests[i] for i in inds])
        labels = F.transpose_sequence([labels[i] for i in inds])
        sequences = F.transpose_sequence([sequences[i] for i in inds])
        leaf_labels = F.transpose_sequence(
            [leaf_labels[i] for i in inds])

        batch = len(inds)
        maxlen = len(sequences)

        loss = 0
        count = 0
        correct = 0

        stack = self.xp.zeros(
            (batch, maxlen * 2, self.n_units), self.xp.float32)
        for i, (word, label) in enumerate(zip(sequences, leaf_labels)):
            batch = word.shape[0]
            es = self.leaf(word)
            ds = self.xp.full((batch,), i, self.xp.int32)
            y = self.label(es)
            loss += F.softmax_cross_entropy(y, label, normalize=False) * batch
            count += batch
            predict = self.xp.argmax(y.array, axis=1)
            correct += (predict == label.array).sum()

            stack = thin_stack.thin_stack_set(stack, ds, es)

        for left, right, dest, label in zip(lefts, rights, dests, labels):
            l, stack = thin_stack.thin_stack_get(stack, left)
            r, stack = thin_stack.thin_stack_get(stack, right)
            o = self.node(l, r)
            y = self.label(o)
            batch = l.shape[0]
            loss += F.softmax_cross_entropy(y, label, normalize=False) * batch
            count += batch
            predict = self.xp.argmax(y.array, axis=1)
            correct += (predict == label.array).sum()

            stack = thin_stack.thin_stack_set(stack, dest, o)

        loss /= count
        reporter.report({'loss': loss}, self)
        reporter.report({'total': count}, self)
        reporter.report({'correct': correct}, self)
        return loss
Exemplo n.º 13
0
def train_dcgan_labeled(images, gen, dis):
    o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_gen.setup(gen)
    o_dis.setup(dis)
    o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001))

    zeros = Variable(xp.zeros(batchsize, dtype=np.int32))
    ones = Variable(xp.ones(batchsize, dtype=np.int32))

    for epoch in tqdm(range(n_epoch)):
        # discriminator
        # 0: from dataset
        # 1: from noise

        # train generator
        z = xp.random.uniform(-1, 1, (batchsize, nz), dtype=np.float32)
        z = Variable(z)
        x = gen(z)
        yl = dis(x)
        L_gen = F.softmax_cross_entropy(yl, zeros)
        L_dis = F.softmax_cross_entropy(yl, ones)

        # train discriminator
        x = generate_data(images)
        yl = dis(x)
        L_dis += F.softmax_cross_entropy(yl, zeros)

        o_gen.zero_grads()
        L_gen.backward()
        o_gen.update()

        o_dis.zero_grads()
        L_dis.backward()
        o_dis.update()

        if epoch % image_save_interval == 0 and epoch > 0:
            z = zvis
            z[50:, :] = xp.random.uniform(-1, 1, (50, nz), dtype=np.float32)
            z = Variable(z)
            x = gen(z, test=True)

            filename = '{}/vis_{}.png'.format(out_image_dir, epoch)
            generate_and_save(filename, x.data.get())

            path = join(out_model_dir, "dcgan_model_dis_{}.h5".format(epoch))
            serializers.save_hdf5(path, dis)

            path = join(out_model_dir, "dcgan_model_gen_%d.h5".format(epoch))
            serializers.save_hdf5(path, gen)

            path = join(out_model_dir, "dcgan_state_dis_%d.h5".format(epoch))
            serializers.save_hdf5(path, o_dis)

            path = join(out_model_dir, "dcgan_state_gen_%d.h5".format(epoch))
            serializers.save_hdf5(path, o_gen)
Exemplo n.º 14
0
    def __call__(self, x, t):
        self.clear()
        test = not self.train

        h = F.max_pooling_2d(
            F.relu(self.norm1(self.conv1(x), test=test)),  3, stride=2, pad=1)
        h = F.max_pooling_2d(
            F.relu(self.norm2(self.conv2(h), test=test)), 3, stride=2, pad=1)

        h = self.inc3a(h)
        h = self.inc3b(h)
        h = self.inc3c(h)
        h = self.inc4a(h)

        a = F.average_pooling_2d(h, 5, stride=3)
        a = F.relu(self.norma(self.conva(a), test=test))
        a = F.relu(self.norma2(self.lina(a), test=test))
        a = self.outa(a)
        self.loss1 = F.softmax_cross_entropy(a, t)

        h = self.inc4b(h)
        h = self.inc4c(h)
        h = self.inc4d(h)

        b = F.average_pooling_2d(h, 5, stride=3)
        b = F.relu(self.normb(self.convb(b), test=test))
        b = F.relu(self.normb2(self.linb(b), test=test))
        b = self.outb(b)
        self.loss2 = F.softmax_cross_entropy(b, t)

        h = self.inc4e(h)
        h = self.inc5a(h)
        h = F.average_pooling_2d(self.inc5b(h), 7)
        h = self.out(h)
        self.loss3 = F.softmax_cross_entropy(h, t)

        self.loss = 0.3 * (self.loss1 + self.loss2) + self.loss3
        self.accuracy = F.accuracy(h, t)
        shishi = F.softmax(h)
#        kankan = shishi.data[0].tolist()
#        categories = np.loadtxt("labels.txt", str, delimiter="\t")
#        top_k = 10
#        prediction = zip(kankan,categories)
#        for feifei in categories:
#          print(feifei)
#        prediction.sort(cmp=lambda x,y: cmp(x[0],y[0]),reverse=True)
#        cuowushuchu = ('cuowuchushu.txt','w')
#        for rank,(score,name) in enumerate(prediction[:3],start=1):
#          print('#%d | %s | %4.1f%%' % (rank,name,score * 100))
#        print('\n')
        
#        for rank,(score,name) in enumerate(prediction[:2],start=1):
#          feijigege = score * 100
#          cuowushuchu.write(str(name)+' '+str(feijigege))
#        cuowushuchu.close()
        return shishi  
Exemplo n.º 15
0
    def check_value_check(self, x_data, t_data, use_cudnn):
        x = chainer.Variable(x_data)
        t = chainer.Variable(t_data)

        if self.valid:
            # Check if it throws nothing
            functions.softmax_cross_entropy(x, t, use_cudnn)
        else:
            with self.assertRaises(ValueError):
                functions.softmax_cross_entropy(x, t, use_cudnn)
Exemplo n.º 16
0
    def forward(self, x_img, x_doc, y_data, train=True):
        x_img = cuda.cupy.asarray(x_img)
        x_doc = cuda.cupy.asarray(x_doc)
        y_data = cuda.cupy.asarray(y_data)

        img, doc, t = Variable(x_img), Variable(x_doc), Variable(y_data)

        h = F.relu(self.conv1(img))
        h = F.local_response_normalization(
        F.max_pooling_2d(h, 3, stride=2), n=5)
        h = F.relu(self.conv2_reduce(h))
        h = F.relu(self.conv2(h))
        h = F.max_pooling_2d(
        F.local_response_normalization(h, n=5), 3, stride=2)

        h = self.inc3a(h)
        h = self.inc3b(h)
        h = F.max_pooling_2d(h, 3, stride=2)
        h = self.inc4a(h)

        l = F.average_pooling_2d(h, 5, stride=3)
        l = F.relu(self.loss1_conv(l))
        l = F.relu(self.loss1_fc1(l))
        l = self.loss1_fc2(l)
        self.loss1 = F.softmax_cross_entropy(l, t)

        h = self.inc4b(h)
        h = self.inc4c(h)
        h = self.inc4d(h)

        l = F.average_pooling_2d(h, 5, stride=3)
        l = F.relu(self.loss2_conv(l))
        l = F.relu(self.loss2_fc1(l))
        l = self.loss2_fc2(l)
        self.loss2 = F.softmax_cross_entropy(l, t)

        h = self.inc4e(h)
        h = F.max_pooling_2d(h, 3, stride=2)
        h = self.inc5a(h)
        h = self.inc5b(h)

        h = F.average_pooling_2d(h, 7, stride=1)
        h = self.loss3_fc1(F.dropout(h, 0.4, train=train))

        h2 = F.relu(self.doc_fc1(F.dropout(doc, train=train)))
        h2 = F.relu(self.doc_fc2(h2))
        b = F.relu(self.bi1(h, h2))
        h = self.loss3_fc2(b)

        self.loss3 = F.softmax_cross_entropy(h, t)

        if train:
            return 0.3 * (self.loss1 + self.loss2) + self.loss3
        else:
            return F.accuracy(h, t)
Exemplo n.º 17
0
    def forward(self, x_data, y_data, train=True):
        x, t = Variable(x_data), Variable(y_data)
        h = F.max_pooling_2d(F.relu(self.bn1(self.conv1(x))), 3, stride=2)
        h = F.max_pooling_2d(F.relu(self.bn2(self.conv2(h))), 3, stride=2)
        h = F.max_pooling_2d(F.relu(self.conv3(h)), 3, stride=2)
        h = self.fc4(h)

        if train:
            return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
        else:
            return F.softmax_cross_entropy(h, t), F.accuracy(h, t), h
Exemplo n.º 18
0
    def compute_loss(self, input_ids, input_mask, token_type_ids,
                     start_positions, end_positions):
        (start_logits, end_logits) = self.__call__(
            input_ids, input_mask, token_type_ids)
        start_loss = F.softmax_cross_entropy(start_logits, start_positions)
        end_loss = F.softmax_cross_entropy(end_logits, end_positions)
        total_loss = (start_loss + end_loss) / 2.0
        chainer.report({'loss': total_loss.array}, self)

        accuracy = (check_answers(start_logits, start_positions) *
                    check_answers(end_logits, end_positions, start_positions)).mean()
        chainer.report({'accuracy': accuracy}, self)
        return total_loss
Exemplo n.º 19
0
    def train(self, x_img, x_doc, y_data, regression, gpu=True, useImage=True, useDoc=True):
        xp = cuda.cupy if gpu else np
        x_img = xp.asarray(x_img)
        x_doc = xp.asarray(x_doc)
        y_data = xp.asarray(y_data)
        img, doc, t = Variable(x_img), Variable(x_doc), Variable(y_data)
        y = self.model.forward(img, doc, regression=regression, useImage=useImage, useDoc=useDoc)

        # calc loss
        if useImage:
            if regression:
                a = self.toLog(y["a"], xp)
                b = self.toLog(y["b"], xp)
                h = self.toLog(y["h"], xp)
                t = self.toLog(t, xp)
                self.loss1 = F.mean_squared_error(a, t)
                self.loss2 = F.mean_squared_error(b, t)
                self.loss3 = F.mean_squared_error(h, t)
            else:
                a = y["a"]
                b = y["b"]
                h = y["h"]
                self.loss1 = F.softmax_cross_entropy(a, t)
                self.loss2 = F.softmax_cross_entropy(b, t)
                self.loss3 = F.softmax_cross_entropy(h, t)
            loss = 0.3 * (self.loss1 + self.loss2) + self.loss3
        else:
            if regression:
                h = self.toLog(y, xp)
                t = self.toLog(t, xp)
                self.loss1 = F.mean_squared_error(h, t)
            else:
                h = y
                self.loss1 = F.softmax_cross_entropy(y, t)
            loss = self.loss1


        # random select optimizer
        rnd = np.random.randint(0, len(self.myOptimizers))
        self.optimizer = self.myOptimizers[rnd]
        self.optimizer.setup(self.model)
        self.optimizer.zero_grads()
        loss.backward()
        self.optimizer.update()

        if regression:
            h = np.array(cuda.to_cpu(h.data)).reshape((len(h)))
            t = np.array(cuda.to_cpu(t.data)).reshape((len(t)))
            return loss.data, h, t
        else:
            return loss.data, F.accuracy(h, t).data, []
Exemplo n.º 20
0
    def forward(self, x_data, y_data, train=True):
        x = chainer.Variable(x_data, volatile=not train)
        t = chainer.Variable(y_data, volatile=not train)

        h = F.relu(self.conv1(x))
        h = F.local_response_normalization(
            F.max_pooling_2d(h, 3, stride=2), n=5)

        h = F.relu(self.conv2_reduce(h))
        h = F.relu(self.conv2(h))
        h = F.max_pooling_2d(
            F.local_response_normalization(h, n=5), 3, stride=2)

        h = self.inc3a(h)
        h = self.inc3b(h)
        h = F.max_pooling_2d(h, 3, stride=2)
        h = self.inc4a(h)

        if train:
            loss1 = F.average_pooling_2d(h, 5, stride=3)
            loss1 = F.relu(self.loss1_conv(loss1))
            loss1 = F.relu(self.loss1_fc1(loss1))
            loss1 = self.loss1_fc2(loss1)
            loss1 = F.softmax_cross_entropy(loss1, t)

        h = self.inc4b(h)
        h = self.inc4c(h)
        h = self.inc4d(h)

        if train:
            loss2 = F.average_pooling_2d(h, 5, stride=3)
            loss2 = F.relu(self.loss2_conv(loss2))
            loss2 = F.relu(self.loss2_fc1(loss2))
            loss2 = self.loss2_fc2(loss2)
            loss2 = F.softmax_cross_entropy(loss2, t)

        h = self.inc4e(h)
        h = F.max_pooling_2d(h, 3, stride=2)
        h = self.inc5a(h)
        h = self.inc5b(h)

        h = F.dropout(F.average_pooling_2d(h, 7, stride=1), 0.4, train=train)
        h = self.loss3_fc(h)
        loss3 = F.softmax_cross_entropy(h, t)

        if train:
            loss = 0.3 * (loss1 + loss2) + loss3
        else:
            loss = loss3
        accuracy = F.accuracy(h, t)
        return loss, accuracy
Exemplo n.º 21
0
Arquivo: model.py Projeto: musyoku/NLP
	def train_word_embedding_batch(self, char_ids_batch):
		xp = self.xp
		word_vec = self.encode_word_batch(char_ids_batch)
		batchsize = char_ids_batch.shape[0]
		char_ids_batch = char_ids_batch.T

		# reconstruction loss
		loss_reconstruction = 0
		self.word_decoder_lstm.reset_state()
		prev_y = None
		for i in xrange(char_ids_batch.shape[0]):
			if prev_y is None:
				prev_y = Variable(xp.zeros((batchsize, self.char_embed_size), dtype=xp.float32))
			dec_in = F.concat((word_vec, prev_y))
			y = self.word_decoder_lstm(dec_in, test=False)
			target = Variable(char_ids_batch[i])
			if self.gpu_enabled:
				target.to_gpu()
			loss = F.softmax_cross_entropy(y, target)
			prev_y = self.embed_id(target)
			loss_reconstruction += loss

		self.zero_grads_generator()
		loss_reconstruction.backward()
		self.update_generator()

		# adversarial loss
		## 0: from encoder
		## 1: from noise
		real_z = self.sample_z(batchsize, self.word_embed_size)
		fake_z = word_vec
		y_fake = self.discriminator(fake_z, test=False)

		## train generator
		loss_generator = F.softmax_cross_entropy(y_fake, Variable(xp.ones((batchsize,), dtype=xp.int32)))

		self.zero_grads_generator()
		loss_generator.backward()
		self.update_generator()

		# train discriminator
		y_real = self.discriminator(real_z, test=False)
		loss_discriminator = F.softmax_cross_entropy(y_fake, Variable(xp.zeros((batchsize,), dtype=xp.int32)))
		loss_discriminator += F.softmax_cross_entropy(y_real, Variable(xp.ones((batchsize,), dtype=xp.int32)))

		self.optimizer_discriminator.zero_grads()
		loss_discriminator.backward()
		self.optimizer_discriminator.update()

		return float(loss_reconstruction.data), float(loss_generator.data), float(loss_discriminator.data)
Exemplo n.º 22
0
    def __call__(self, x, t):
        h = F.relu(self.conv1(x))
        h = F.local_response_normalization(
            F.max_pooling_2d(h, 3, stride=2), n=5)
        h = F.relu(self.conv2_reduce(h))
        h = F.relu(self.conv2(h))
        h = F.max_pooling_2d(
            F.local_response_normalization(h, n=5), 3, stride=2)

        h = self.inc3a(h)
        h = self.inc3b(h)
        h = F.max_pooling_2d(h, 3, stride=2)
        h = self.inc4a(h)

        l = F.average_pooling_2d(h, 5, stride=3)
        l = F.relu(self.loss1_conv(l))
        l = F.relu(self.loss1_fc1(l))
        l = self.loss1_fc2(l)
        loss1 = F.softmax_cross_entropy(l, t)

        h = self.inc4b(h)
        h = self.inc4c(h)
        h = self.inc4d(h)

        l = F.average_pooling_2d(h, 5, stride=3)
        l = F.relu(self.loss2_conv(l))
        l = F.relu(self.loss2_fc1(l))
        l = self.loss2_fc2(l)
        loss2 = F.softmax_cross_entropy(l, t)

        h = self.inc4e(h)
        h = F.max_pooling_2d(h, 3, stride=2)
        h = self.inc5a(h)
        h = self.inc5b(h)

        h = F.average_pooling_2d(h, 7, stride=1)
        h = self.loss3_fc(F.dropout(h, 0.4))
        loss3 = F.softmax_cross_entropy(h, t)

        loss = 0.3 * (loss1 + loss2) + loss3
        accuracy = F.accuracy(h, t)

        chainer.report({
            'loss': loss,
            'loss1': loss1,
            'loss2': loss2,
            'loss3': loss3,
            'accuracy': accuracy
        }, self)
        return loss
Exemplo n.º 23
0
    def check_value_check(self, x_data, t_data, use_cudnn):
        x = chainer.Variable(x_data)
        t = chainer.Variable(t_data)

        with chainer.using_config('use_cudnn', use_cudnn):
            if self.valid:
                # Check if it throws nothing
                functions.softmax_cross_entropy(
                    x, t, enable_double_backprop=self.enable_double_backprop)
            else:
                with self.assertRaises(ValueError):
                    functions.softmax_cross_entropy(
                        x, t,
                        enable_double_backprop=self.enable_double_backprop)
Exemplo n.º 24
0
    def __call__(self, z, x):
        batchsize = z.data.shape[0]

        # generate
        x_gen = self.gen(z)
        y_gen = self.dis(x_gen)
        loss_gen = F.softmax_cross_entropy(y_gen, Variable(self._get_zeros(batchsize)))
        loss_dis = F.softmax_cross_entropy(y_gen, Variable(self._get_ones(batchsize)))

        # discriminate
        y = self.dis(x)
        loss_dis += F.softmax_cross_entropy(y, Variable(self._get_zeros(batchsize)))

        return loss_gen, loss_dis
Exemplo n.º 25
0
    def __call__(self, x, t):
        h = F.relu(self['conv1/7x7_s2'](x))
        h = F.local_response_normalization(
            F.max_pooling_2d(h, 3, stride=2), n=5, alpha=(1e-4)/5, k=1)
        h = F.relu(self['conv2/3x3_reduce'](h))
        h = F.relu(self['conv2/3x3'](h))
        h = F.max_pooling_2d(F.local_response_normalization(
            h, n=5, alpha=(1e-4)/5, k=1), 3, stride=2)

        h = self.call_inception(h, 'inception_3a')
        h = self.call_inception(h, 'inception_3b')
        h = F.max_pooling_2d(h, 3, stride=2)
        h = self.call_inception(h, 'inception_4a')

        l = F.average_pooling_2d(h, 5, stride=3)
        l = F.relu(self['loss1/conv'](l))
        l = F.dropout(F.relu(self['loss1/fc'](l)), 0.7, train=self.train)
        l = self['loss1/classifier'](l)
        loss1 = F.softmax_cross_entropy(l, t)

        h = self.call_inception(h, 'inception_4b')
        h = self.call_inception(h, 'inception_4c')
        h = self.call_inception(h, 'inception_4d')

        l = F.average_pooling_2d(h, 5, stride=3)
        l = F.relu(self['loss2/conv'](l))
        l = F.dropout(F.relu(self['loss2/fc'](l)), 0.7, train=self.train)
        l = self['loss2/classifier'](l)
        loss2 = F.softmax_cross_entropy(l, t)

        h = self.call_inception(h, 'inception_4e')
        h = F.max_pooling_2d(h, 3, stride=2)
        h = self.call_inception(h, 'inception_5a')
        h = self.call_inception(h, 'inception_5b')

        h = F.average_pooling_2d(h, 7, stride=1)
        h = self['loss3/classifier'](F.dropout(h, 0.4, train=self.train))
        loss3 = F.softmax_cross_entropy(h, t)

        loss = 0.3 * (loss1 + loss2) + loss3
        accuracy = F.accuracy(h, t)
        chainer.report({
            'loss': loss,
            'loss1': loss1,
            'loss2': loss2,
            'loss3': loss3,
            'accuracy': accuracy
        }, self)
        return loss
Exemplo n.º 26
0
def train_gen(gen, dis, optimizer_gen, optimizer_dis, x_batch, gpu_device):
    batch_size = len(x_batch)
    if gpu_device == None:
        xp = xp
    else:
        xp = cuda.cupy
    z = Variable(xp.random.uniform(-1, 1, (batch_size, LATENT_SIZE)).astype(np.float32))
    x = gen(z)
    y1 = dis(x)
    loss_gen = F.softmax_cross_entropy(y1, Variable(xp.zeros(batch_size).astype(np.int32)))
    loss_dis = F.softmax_cross_entropy(y1, Variable(xp.ones(batch_size).astype(np.int32)))
    optimizer_gen.zero_grads()
    loss_gen.backward()
    optimizer_gen.update()
    return loss_gen.data
Exemplo n.º 27
0
    def __call__(self, x, t):
        test = not self.train
        finetune = self.finetune

        h = self.call_conv_bn_sc(x, 'conv1/7x7_s2', test=test, finetune=finetune)
        h = F.max_pooling_2d(h, 3, stride=2, pad=1)
        h = self.call_conv_bn_sc(h, 'conv2/3x3_reduce', test=test, finetune=finetune)
        h = self.call_conv_bn_sc(h, 'conv2/3x3', test=test)
        h = F.max_pooling_2d(h, 3, stride=2, pad=1)
        h = self.call_inception_bn(h, 'inception_3a', test=test, finetune=finetune)
        h = self.call_inception_bn(h, 'inception_3b', test=test, finetune=finetune)
        h = self.call_inception_bn(h, 'inception_3c', test=test, finetune=finetune)

        a = F.average_pooling_2d(h, 5, stride=3)
        a = self.call_conv_bn_sc(a, 'loss1/conv', test=test, finetune=finetune)
        a = self.call_fc_bn_sc(a, 'loss1/fc', test=test, finetune=finetune)
        a = self['loss1/classifier'](a)
        loss1 = F.softmax_cross_entropy(a, t)

        h = self.call_inception_bn(h, 'inception_4a', test=test, finetune=finetune)
        h = self.call_inception_bn(h, 'inception_4b', test=test, finetune=finetune)
        h = self.call_inception_bn(h, 'inception_4c', test=test, finetune=finetune)
        h = self.call_inception_bn(h, 'inception_4d', test=test, finetune=finetune)
        h = self.call_inception_bn(h, 'inception_4e', test=test, finetune=finetune)

        b = F.average_pooling_2d(h, 5, stride=3)
        b = self.call_conv_bn_sc(b, 'loss2/conv', test=test, finetune=finetune)
        b = self.call_fc_bn_sc(b, 'loss2/fc', test=test, finetune=finetune)
        b = self['loss2/classifier'](b)
        loss2 = F.softmax_cross_entropy(b, t)

        h = self.call_inception_bn(h, 'inception_5a', test=test, finetune=finetune)
        h = self.call_inception_bn(h, 'inception_5b', test=test, finetune=finetune)

        h = F.average_pooling_2d(h, 7, stride=1)
        h = self['loss3/classifier'](h)
        loss3 = F.softmax_cross_entropy(h, t)

        loss = 0.3 * (loss1 + loss2) + loss3
        accuracy = F.accuracy(h, t)
        chainer.report({
            'loss': loss,
            'loss1': loss1,
            'loss2': loss2,
            'loss3': loss3,
            'accuracy': accuracy
        }, self)
        return loss
Exemplo n.º 28
0
    def update_parameter_by_meta_learner(
            self, model_params, loss, 
            x_l0, x_l1, y_l):

        # Forward meta-learner
        namedparams = model_params
        for i, elm in enumerate(namedparams.items()):  # parameter-loop
            k, p = elm
            with cuda.get_device_from_id(self.device):
                shape = p.shape
                xp = cuda.get_array_module(p.data)

                x = p.grad
                grad = xp.reshape(x, (np.prod(shape), ))
                meta_learner = self.meta_learners[i]
                g = meta_learner(Variable(grad))  # forward
                w = p - F.reshape(g, shape)
                self.model_params[k] = w

        # Train meta-learner with main objective
        y_pred = self.model(x_l0, self.model_params)
        loss_ce = F.softmax_cross_entropy(y_pred, y_l)
        
        self.cleargrads()  # need to clear W'grad due to loss_rec.backward
        for meta_learner in self.meta_learners:
            meta_learner.cleargrads()
        loss_ce.backward(retain_grad=True)
        for opt in self.opt_meta_learners:
            opt.update()

        loss_ce.unchain_backward()  #TODO: here is a proper place to unchain?
Exemplo n.º 29
0
    def forward(self, xs, ys):
        xs = [x[::-1] for x in xs]

        eos = self.xp.array([EOS], numpy.int32)
        ys_in = [F.concat([eos, y], axis=0) for y in ys]
        ys_out = [F.concat([y, eos], axis=0) for y in ys]

        # Both xs and ys_in are lists of arrays.
        exs = sequence_embed(self.embed_x, xs)
        eys = sequence_embed(self.embed_y, ys_in)

        batch = len(xs)
        # None represents a zero vector in an encoder.
        hx, cx, _ = self.encoder(None, None, exs)
        _, _, os = self.decoder(hx, cx, eys)

        # It is faster to concatenate data before calculating loss
        # because only one matrix multiplication is called.
        concat_os = F.concat(os, axis=0)
        concat_ys_out = F.concat(ys_out, axis=0)
        loss = F.sum(F.softmax_cross_entropy(
            self.W(concat_os), concat_ys_out, reduce='no')) / batch

        chainer.report({'loss': loss}, self)
        n_words = concat_ys_out.shape[0]
        perp = self.xp.exp(loss.array * batch / n_words)
        chainer.report({'perp': perp}, self)
        return loss
def forward(x_data, y_data, model,train=True):
        # Neural net architecture
        #x, t = chainer.Variable(x_data), chainer.Variable(y_data)
        t = chainer.Variable(y_data)
        x = {}
        for n in range(500):
            x[n] = chainer.Variable(x_data[n])
        h = {}
        initial_V = {}
        initial_V_relu = {}
        for nameint in range(len(l_name)-2):
            initial_V[nameint] = model[l_name[nameint]](x[nameint])
            #initial_V_relu[nameint] = F.relu(initial_V[nameint])
            #initial_V_relu[nameint] = F.sigmoid(initial_V[nameint])
            initial_V_relu[nameint] = F.tanh(initial_V[nameint])
            #h[nameint] = F.dropout(F.relu(initial_V[nameint]), train=train)
            #h[nameint] = F.dropout(F.sigmoid(initial_V[nameint]), train=train)
            h[nameint] = F.dropout(F.tanh(initial_V[nameint]), train=train)
            #h[nameint] = F.relu(model[l_name[nameint]](x[nameint]))
        #h6 = F.dropout(F.relu(model.l501(Returnharray(h))), train=train)
        #h6 = F.dropout(F.sigmoid(model.l501(Returnharray(h))), train=train)
        h6 = F.dropout(F.tanh(model.l501(Returnharray(h))), train=train)
        y = model.l502(h6)
        y_pre = (y.data.argmax(axis = 1))
        return F.softmax_cross_entropy(y, t), F.accuracy(y, t),y_pre,initial_V,initial_V_relu
Exemplo n.º 31
0
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    config = model.config

    # settings
    max_epoch = 1000
    num_trains_per_epoch = 5000
    num_validation_data = 10000
    batchsize = 128

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # save validation accuracy per epoch
    csv_results = []

    # create semi-supervised split
    training_images, training_labels, validation_images, validation_labels = dataset.split_data(
        images, labels, num_validation_data, seed=args.seed)

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_loss = 0

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            image_batch, label_batch = dataset.sample_data(training_images,
                                                           training_labels,
                                                           batchsize,
                                                           binarize=False)
            distribution = model.discriminate(image_batch, apply_softmax=False)
            loss = F.softmax_cross_entropy(distribution,
                                           model.to_variable(label_batch))
            sum_loss += float(loss.data)

            model.backprop(loss)

            if t % 10 == 0:
                progress.show(t, num_trains_per_epoch, {})

        model.save(args.model_dir)
        train_accuracy = compute_accuracy(training_images, training_labels)
        validation_accuracy = compute_accuracy(validation_images,
                                               validation_labels)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "loss": sum_loss / num_trains_per_epoch,
                "accuracy (validation)": validation_accuracy,
                "accuracy (train)": train_accuracy,
            })

        # write accuracy to csv
        csv_results.append(
            [epoch, validation_accuracy,
             progress.get_total_time()])
        data = pd.DataFrame(csv_results)
        data.columns = ["epoch", "accuracy", "min"]
        data.to_csv("{}/result.csv".format(args.model_dir))
Exemplo n.º 32
0
 def __call__(self, x, t, train=True):
     y = self.fwd(x, train)
     return F.softmax_cross_entropy(y, t), F.accuracy(y, t)
Exemplo n.º 33
0
    def forward(self,
                x_img,
                x_doc,
                y_data,
                train=True,
                regression=False,
                predict=False,
                gpu=True):
        test = not train

        xp = cuda.cupy if gpu else np
        x_img = xp.asarray(x_img)
        y_data = xp.asarray(y_data)

        img, t = Variable(x_img), Variable(y_data)

        if regression and not predict:
            t = self.toLog(t)
            #t.data = cuda.cupy.asarray(t.data,  dtype=cuda.cupy.float32).reshape((20,1))

        h = F.max_pooling_2d(F.relu(self.norm1(self.conv1(img), test=test)),
                             3,
                             stride=2,
                             pad=1)
        h = F.max_pooling_2d(F.relu(self.norm2(self.conv2(h), test=test)),
                             3,
                             stride=2,
                             pad=1)

        h = self.inc3a(h)
        h = self.inc3b(h)
        h = self.inc3c(h)
        h = self.inc4a(h)

        if not predict:
            a = F.average_pooling_2d(h, 5, stride=3)
            a = F.relu(self.norma(self.conva(a), test=test))
            a = F.relu(self.norma2(self.lina(a), test=test))
            a = self.outa(a)
            if regression:
                #a = self.toLog(a)
                self.loss1 = F.mean_squared_error(a, t)
            else:
                self.loss1 = F.softmax_cross_entropy(a, t)

        h = self.inc4b(h)
        h = self.inc4c(h)
        h = self.inc4d(h)

        if not predict:
            b = F.average_pooling_2d(h, 5, stride=3)
            b = F.relu(self.normb(self.convb(b), test=test))
            b = F.relu(self.normb2(self.linb(b), test=test))
            b = self.outb(b)
            if regression:
                #b = self.toLog(b)
                self.loss2 = F.mean_squared_error(b, t)
            else:
                self.loss2 = F.softmax_cross_entropy(b, t)

        h = self.inc4e(h)
        h = self.inc5a(h)
        h = F.average_pooling_2d(self.inc5b(h), 7)
        h = self.out(h)

        if predict:
            #t.data = cuda.cupy.asarray(t.data,  dtype=cuda.cupy.float32).reshape((20,1))
            #myloss = F.mean_squared_error(h, t)
            return h
        if regression:
            h = self.toLog(h)
            self.loss3 = F.mean_squared_error(h, t)
        else:
            self.loss3 = F.softmax_cross_entropy(h, t)

        if train or regression:
            h = np.array(cuda.to_cpu(h.data)).reshape((len(h)))
            t = np.array(cuda.to_cpu(t.data)).reshape((len(t)))
            #print(h)
            #print(t)
            return 0.3 * (self.loss1 + self.loss2) + self.loss3, np.corrcoef(
                h, t)
        else:
            return F.accuracy(h, t)
Exemplo n.º 34
0
 def forward(self):
     x = chainer.Variable(self.x)
     t = chainer.Variable(self.t)
     return functions.softmax_cross_entropy(x,
                                            t,
                                            enable_double_backprop=False)
Exemplo n.º 35
0
 def __call__(self, x, t):
     loss = F.softmax_cross_entropy(self.predict(x), t)
     chainer.report({'loss': loss / t.shape[0]}, self)
     return loss
Exemplo n.º 36
0
def main():
    # load MNIST images
    images, labels = dataset.load_train_images()

    # config
    config = adgm.config

    # settings
    max_epoch = 1000
    num_trains_per_epoch = 500
    batchsize_l = 100
    batchsize_u = 100
    alpha = 1

    # seed
    np.random.seed(args.seed)
    if args.gpu_device != -1:
        cuda.cupy.random.seed(args.seed)

    # save validation accuracy per epoch
    csv_results = []

    # create semi-supervised split
    num_validation_data = 10000
    num_labeled_data = 100
    num_types_of_label = 10
    training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised(
        images,
        labels,
        num_validation_data,
        num_labeled_data,
        num_types_of_label,
        seed=args.seed)
    print training_labels_l

    # init weightnorm layers
    if config.use_weightnorm:
        print "initializing weight normalization layers ..."
        images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data(
            training_images_l, training_labels_l, batchsize_l, config.ndim_x,
            config.ndim_y)
        images_u = dataset.sample_unlabeled_data(training_images_u,
                                                 batchsize_u, config.ndim_x)
        adgm.compute_lower_bound(images_l, label_onehot_l, images_u)

    # training
    progress = Progress()
    for epoch in xrange(1, max_epoch):
        progress.start_epoch(epoch, max_epoch)
        sum_lower_bound_l = 0
        sum_lower_bound_u = 0
        sum_loss_classifier = 0

        for t in xrange(num_trains_per_epoch):
            # sample from data distribution
            images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data(
                training_images_l, training_labels_l, batchsize_l,
                config.ndim_x, config.ndim_y)
            images_u = dataset.sample_unlabeled_data(training_images_u,
                                                     batchsize_u,
                                                     config.ndim_x)

            # lower bound loss
            lower_bound, lb_labeled, lb_unlabeled = adgm.compute_lower_bound(
                images_l, label_onehot_l, images_u)
            loss_lower_bound = -lower_bound

            # classification loss
            a_l = adgm.encode_x_a(images_l, False)
            unnormalized_y_distribution = adgm.encode_ax_y_distribution(
                a_l, images_l, softmax=False)
            loss_classifier = alpha * F.softmax_cross_entropy(
                unnormalized_y_distribution, adgm.to_variable(label_ids_l))

            # backprop
            adgm.backprop(loss_classifier + loss_lower_bound)

            sum_lower_bound_l += float(lb_labeled.data)
            sum_lower_bound_u += float(lb_unlabeled.data)
            sum_loss_classifier += float(loss_classifier.data)
            progress.show(t, num_trains_per_epoch, {})

        adgm.save(args.model_dir)

        # validation
        images_l, _, label_ids_l = dataset.sample_labeled_data(
            validation_images, validation_labels, num_validation_data,
            config.ndim_x, config.ndim_y)
        images_l_segments = np.split(images_l, num_validation_data // 500)
        label_ids_l_segments = np.split(label_ids_l,
                                        num_validation_data // 500)
        sum_accuracy = 0
        for images_l, label_ids_l in zip(images_l_segments,
                                         label_ids_l_segments):
            y_distribution = adgm.encode_x_y_distribution(images_l,
                                                          softmax=True,
                                                          test=True)
            accuracy = F.accuracy(y_distribution,
                                  adgm.to_variable(label_ids_l))
            sum_accuracy += float(accuracy.data)
        validation_accuracy = sum_accuracy / len(images_l_segments)

        progress.show(
            num_trains_per_epoch, num_trains_per_epoch, {
                "lb_u": sum_lower_bound_u / num_trains_per_epoch,
                "lb_l": sum_lower_bound_l / num_trains_per_epoch,
                "loss_spv": sum_loss_classifier / num_trains_per_epoch,
                "accuracy": validation_accuracy,
            })

        # write accuracy to csv
        csv_results.append([epoch, validation_accuracy])
        data = pd.DataFrame(csv_results)
        data.columns = ["epoch", "accuracy"]
        data.to_csv("{}/result.csv".format(args.model_dir))
Exemplo n.º 37
0
 def __call__(self, x, t):
     y = self.predictor(x)
     loss = F.softmax_cross_entropy(y, t)
     accuracy = F.accuracy(y, t)
     report({'loss': loss, 'accuracy': accuracy}, self)
     return loss
Exemplo n.º 38
0
 def __call__(self, x, t, train=True):
     y = self.predictor(x, train)
     self.loss = F.softmax_cross_entropy(y, t)
     self.accuracy = F.accuracy(y, t)
     return self.loss
Exemplo n.º 39
0
    def trainBatch(self, encSents, decSents, args):
        """main training"""
        ###encoder step
        encEmbed = self.getEmbeddings(encSents, args)
        hy, cy, ys = self.encNStepLSTM(hx=None, cx=None, xs=encEmbed)
        encOut = F.pad_sequence(ys) #[batch, max(sentlen), Dim]

        ###decoder step
        decEmbed = self.getEmbeddings(decSents, args) #embed のリストの状態
        decEmbed = F.pad_sequence(decEmbed).transpose([1, 0, 2]) #padding して[sentLen, batch, Dim]に変更

        decode_step = len(decEmbed) - 1
        decoderOutList = [0] * decode_step
        lstmStateList = [0] * decode_step
        firstInput = chainer.Variable(xp.zeros(hy[0].shape, dtype=xp.float32)) #decLSTMの最初に入力として初期化 embedじゃない方
        for i in range(decode_step):
            #decEmbedじゃないdecLSTMへの入力の準備と、decLSTMのstate準備
            if i == 0: #デコーダの最初のステップ
                self.set_state([cy[0], hy[0]])
                anoInput = firstInput
            else:
                self.set_state(lstmStateList[i - 1])
                anoInput = decoderOutList[i - 1]
            hOut = self.decLSTM(F.concat([decEmbed[i], anoInput], 1)) #decoder LSTMの出力
            lstmStateList[i] = self.get_state()
            decoderOutList[i] = self.attention(hOut, encOut, args) #decoder LSTMの出力をアテンションしたもの decoderの出力 decode_step * [batch, Dim]

        total_loss = chainer.Variable(xp.zeros((), dtype=xp.float32))
        proc = 0
        correct = 0
        incorrect = 0
        ###output層
        correctLabels = F.pad_sequence(decSents, padding=-1).T.array #TODO 何か嫌だから上手く書きたい ってかこの関数全体何か汚い -1でパディングしたから1足したら0がeosトークンになるんじゃね?
        for i in range(decode_step):
            oVector = self.decOut(F.dropout(decoderOutList[i], args.dropout_rate))
            correctLabel = correctLabels[i + 1]

            proc += (xp.count_nonzero(correctLabel + 1)) ###TODO 0を数えてたらunkトークンがなくなるし、1足したら全部1以上になるンゴ
            # 必ずminibatchsizeでわる
            closs = F.softmax_cross_entropy(
                oVector, correctLabel, normalize=False) #normalize=Falseの意味? paddingしてるからっぽい
            # これで正規化なしのloss  cf. seq2seq-attn code
            #total_loss_val += closs.data * cMBSize
            #if train_mode > 0:  # 学習データのみ backward する
            total_loss += closs
            # 実際の正解数を獲得したい
            t_correct = 0
            t_incorrect = 0
            # Devのときは必ず評価,学習データのときはオプションに従って評価
            # if train_mode == 0 or args.doEvalAcc > 0:
            # 予測した単語のID配列 CuPy
            pred_arr = oVector.data.argmax(axis=1)
            # 正解と予測が同じなら0になるはず
            # => 正解したところは0なので,全体から引く ###xp.count_nonzero()は間違えた数?
            t_correct = (correctLabel.size -
                         xp.count_nonzero(correctLabel - pred_arr)) #t_correct正解した数
            # 予測不要の数から正解した数を引く # +1はbroadcast
            t_incorrect = xp.count_nonzero(correctLabel + 1) - t_correct #xp.count_nonzero()は予測する必要のある数 つまりt_incorrectは間違えた数
            correct += t_correct
            incorrect += t_incorrect
        ####
        #total_loss.backward()

        return total_loss, (correct, incorrect, decode_step, proc)
    def __call__(self, imgs, masks, labels, bboxes, scales):
        """Forward FCIS and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.
        * :math:`H` is the image height.
        * :math:`W` is the image width.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~chainer.Variable): A variable with a batch of images.
            masks (~chainer.Variable): A batch of masks.
                Its shape is :math:`(N, R, H, W)`.
            labels (~chainer.Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            bboxes (~chainer.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            scales (float or ~chainer.Variable): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            chainer.Variable:
            Scalar loss variable.
            This is the sum of losses for Region Proposal Network and
            the head module.

        """
        if isinstance(masks, chainer.Variable):
            masks = masks.array
        if isinstance(labels, chainer.Variable):
            labels = labels.array
        if isinstance(bboxes, chainer.Variable):
            bboxes = bboxes.array
        if isinstance(scales, chainer.Variable):
            scales = scales.array
        scales = cuda.to_cpu(scales)

        batch_size, _, H, W = imgs.shape
        img_size = (H, W)
        assert img_size == masks.shape[2:]

        if any(len(b) == 0 for b in bboxes):
            return chainer.Variable(self.xp.array(0, dtype=np.float32))

        rpn_features, roi_features = self.fcis.extractor(imgs)
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.fcis.rpn(
            rpn_features, img_size, scales)
        rpn_locs = F.concat(rpn_locs, axis=0)
        rpn_scores = F.concat(rpn_scores, axis=0)

        gt_rpn_locs = []
        gt_rpn_labels = []
        for bbox in bboxes:
            gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
                bbox, anchor, img_size)
            if cuda.get_array_module(rpn_locs.array) != np:
                gt_rpn_loc = cuda.to_gpu(gt_rpn_loc)
                gt_rpn_label = cuda.to_gpu(gt_rpn_label)
            gt_rpn_locs.append(gt_rpn_loc)
            gt_rpn_labels.append(gt_rpn_label)
            del gt_rpn_loc, gt_rpn_label
        gt_rpn_locs = self.xp.concatenate(gt_rpn_locs, axis=0)
        gt_rpn_labels = self.xp.concatenate(gt_rpn_labels, axis=0)

        batch_indices = range(batch_size)
        sample_rois = []
        sample_roi_indices = []
        gt_roi_masks = []
        gt_roi_labels = []
        gt_roi_locs = []

        for batch_index, mask, label, bbox in \
                zip(batch_indices, masks, labels, bboxes):
            roi = rois[roi_indices == batch_index]
            sample_roi, gt_roi_mask, gt_roi_label, gt_roi_loc = \
                self.proposal_target_creator(
                    roi, mask, label, bbox, self.loc_normalize_mean,
                    self.loc_normalize_std, self.mask_size)
            del roi
            sample_roi_index = self.xp.full((len(sample_roi), ),
                                            batch_index,
                                            dtype=np.int32)
            sample_rois.append(sample_roi)
            sample_roi_indices.append(sample_roi_index)
            del sample_roi, sample_roi_index
            gt_roi_masks.append(gt_roi_mask)
            gt_roi_labels.append(gt_roi_label)
            gt_roi_locs.append(gt_roi_loc)
            del gt_roi_mask, gt_roi_label, gt_roi_loc
        sample_rois = self.xp.concatenate(sample_rois, axis=0)
        sample_roi_indices = self.xp.concatenate(sample_roi_indices, axis=0)
        gt_roi_masks = self.xp.concatenate(gt_roi_masks, axis=0)
        gt_roi_labels = self.xp.concatenate(gt_roi_labels, axis=0)
        gt_roi_locs = self.xp.concatenate(gt_roi_locs, axis=0)

        roi_ag_seg_scores, roi_ag_locs, roi_cls_scores, _, _ = self.fcis.head(
            roi_features, sample_rois, sample_roi_indices, img_size,
            gt_roi_labels)

        # RPN losses
        rpn_loc_loss = _fast_rcnn_loc_loss(rpn_locs, gt_rpn_locs,
                                           gt_rpn_labels, self.rpn_sigma)
        rpn_cls_loss = F.softmax_cross_entropy(rpn_scores, gt_rpn_labels)

        if self.n_ohem_sample is None:
            n_roi = roi_ag_locs.shape[0]
            gt_roi_fg_labels = (gt_roi_labels > 0).astype(np.int)
            roi_locs = roi_ag_locs[self.xp.arange(n_roi), gt_roi_fg_labels]
            roi_loc_loss = _fast_rcnn_loc_loss(roi_locs, gt_roi_locs,
                                               gt_roi_labels, self.roi_sigma)
            roi_cls_loss = F.softmax_cross_entropy(roi_cls_scores,
                                                   gt_roi_labels)
            roi_mask_loss = F.softmax_cross_entropy(
                roi_ag_seg_scores, gt_roi_masks, normalize=False) \
                * 10.0 / self.mask_size / self.mask_size
        else:
            # Losses for outputs of the head
            roi_loc_loss, roi_cls_loss, roi_mask_loss = _ohem_loss(
                roi_ag_locs, roi_cls_scores, roi_ag_seg_scores, gt_roi_locs,
                gt_roi_labels, gt_roi_masks, self.n_ohem_sample,
                self.roi_sigma, self.mask_size)

        loss = rpn_loc_loss + rpn_cls_loss \
            + roi_loc_loss + roi_cls_loss + roi_mask_loss
        chainer.reporter.report(
            {
                'rpn_loc_loss': rpn_loc_loss,
                'rpn_cls_loss': rpn_cls_loss,
                'roi_loc_loss': roi_loc_loss,
                'roi_cls_loss': roi_cls_loss,
                'roi_mask_loss': roi_mask_loss,
                'loss': loss,
            }, self)

        return loss
Exemplo n.º 41
0
 def forward(self):
     x = chainer.Variable(self.x)
     t = chainer.Variable(self.t)
     return functions.softmax_cross_entropy(x, t, self.use_cudnn)
 def __call__(self, x, y):
     return F.softmax_cross_entropy(self.fwd(x), y)
Exemplo n.º 43
0
        #以下正確なaccuracyを求めるため
        test_loss = 0
        test_accuracy = 0
        miss_train_copy = miss_train_total.copy()
        miss_test_copy = miss_test_total.copy()
        correct_train_copy = correct_train_total.copy()
        correct_test_copy = correct_test_total.copy()

        for i in range(0, N_test, batch_size_predict):
            x = Variable(cuda.to_gpu(x_test[i:i +
                                            batch_size_predict]))  #評価画像データ
            t = Variable(cuda.to_gpu(t_test[i:i + batch_size_predict]))  #評価ラベル
            y = model.predict(x)
            model.zerograds()
            loss = F.softmax_cross_entropy(y, t)

            #こっから誤判別を特定するプログラム
            acc, data, data2, index, pred, corre, correct_index = accuracy(
                y, t)
            acc.to_cpu()
            data.to_cpu()
            data2.to_cpu()
            index.to_cpu()
            pred.to_cpu()
            corre.to_cpu()
            correct_index.to_cpu()
            t.to_cpu()
            x.to_cpu()
            for i in range(len(data.data)):
                miss_test_total[data.data[i]] += 1
Exemplo n.º 44
0
    def forward(self, word_list, gold_op_list, unary_limit):
        is_training = gold_op_list is not None

        # check args
        if len(word_list) < 1:
            raise ValueError('Word list is empty.')
        if is_training:
            n_shift = 0
            n_binary = 0
            for op, _ in gold_op_list:
                if op == OP_SHIFT: n_shift += 1
                if op == OP_BINARY: n_binary += 1
            if n_shift != len(word_list) or n_binary != len(word_list) - 1:
                raise ValueError(
                    'Invalid operation number: SHIFT=%d (required: %d), BINARY=%d (required: %d)'
                    % (n_shift, n_binary, len(word_list), len(word_list) - 1))
            if gold_op_list[-1] != (OP_FINISH, None):
                raise ValueError('Last operation is not OP_FINISH.')

        # initial values
        QUEUE_ZEROS = XP.fzeros((1, self.n_queue))
        STACK_ZEROS = XP.fzeros((1, self.n_stack))
        SRSTATE_ZEROS = XP.fzeros((1, self.n_srstate))
        NEG_INF = -1e20

        # queue encoding
        q_list = []
        qc = QUEUE_ZEROS
        q = QUEUE_ZEROS

        for text, wid in reversed(word_list):
            qc, q = self.net_encoder(qc, XP.iarray([wid]), q)
            q_list.insert(0, (text, q))

        # estimate
        s_list = []
        zc = SRSTATE_ZEROS
        z = SRSTATE_ZEROS
        unary_chain = 0
        if is_training:
            loss = XP.fzeros(())

        for i in itertools.count():
            text, q = q_list[0] if q_list else ('', QUEUE_ZEROS)
            t1, sc1, s1 = s_list[-1] if s_list else (None, STACK_ZEROS,
                                                     STACK_ZEROS)
            t2, sc2, s2 = s_list[-2] if len(s_list) >= 2 else (None,
                                                               STACK_ZEROS,
                                                               STACK_ZEROS)
            t3, sc3, s3 = s_list[-3] if len(s_list) >= 3 else (None,
                                                               STACK_ZEROS,
                                                               STACK_ZEROS)

            zc, z = self.net_sr(zc, q, s1, z)
            o = self.net_operation(z)

            if is_training:
                loss += functions.softmax_cross_entropy(
                    o, XP.iarray([gold_op_list[i][0]]))
                o_argmax = gold_op_list[i][0]
            else:
                o_filter = [0.0 for _ in range(NUM_OP)]
                filtered = 0
                if not q_list:
                    o_filter[OP_SHIFT] = NEG_INF
                    filtered += 1
                if not s_list or unary_chain >= unary_limit:
                    o_filter[OP_UNARY] = NEG_INF
                    filtered += 1
                if len(s_list) < 2:
                    o_filter[OP_BINARY] = NEG_INF
                    filtered += 1
                if q_list or len(s_list) > 1:
                    o_filter[OP_FINISH] = NEG_INF
                if filtered == NUM_OP:
                    raise RuntimeError('No possible operation!')

                o += XP.farray([o_filter])
                o_argmax = int(cuda.to_cpu(o.data.argmax(1)))

            if o_argmax == OP_SHIFT:
                t0 = Tree(None, [text])
                sc0, s0 = (STACK_ZEROS, self.net_shift(q, s1, z))
                q_list.pop(0)
                unary_chain = 0
                label = self.net_semiterminal(s0)
            elif o_argmax == OP_UNARY:
                t0 = Tree(None, [t1])
                sc0, s0 = self.net_unary(sc1, q, s1, s2, z)
                s_list.pop()
                unary_chain += 1
                label = self.net_phrase(s0)
            elif o_argmax == OP_BINARY:
                t0 = Tree(None, [t2, t1])
                sc0, s0 = self.net_binary(sc1, sc2, q, s1, s2, s3, z)
                s_list.pop()
                s_list.pop()
                unary_chain = 0
                label = self.net_phrase(s0)
            else:  # OP_FINISH
                break

            if is_training:
                loss += functions.softmax_cross_entropy(
                    label, XP.iarray([gold_op_list[i][1]]))
                label_argmax = gold_op_list[i][1]
            else:
                label_argmax = int(cuda.to_cpu(label.data.argmax(1)))

            t0.set_label(label_argmax)
            s_list.append((t0, sc0, s0))
            '''
      if is_training:
        o_est = int(cuda.to_cpu(o.data.argmax(1)))
        label_est = int(cuda.to_cpu(label.data.argmax(1)))
        trace('%c %c gold=%d-%2d, est=%d-%2d, stack=%2d, queue=%2d' % (
            '*' if o_est == gold_op_list[i][0] else ' ',
            '*' if label_est == gold_op_list[i][1] else ' ',
            gold_op_list[i][0], gold_op_list[i][1],
            o_est, label_est,
            len(s_list), len(q_list)))
      '''

        if is_training:
            return loss
        else:
            # combine multiple trees if they exists, and return the result.
            t0, _, __ = s_list.pop()
            if s_list:
                raise RuntimeError('There exist multiple subtrees!')
            return t0
Exemplo n.º 45
0
def train(args):
    vocab = Vocabulary.from_conll(args.train, args.vocab)
    train_dataset = [conll_to_train(x, vocab) for x in read_conll(args.train)]
    dev_dataset = [conll_to_train(x, vocab) for x in read_conll(args.dev)]

    parser = Parser(args.vocab, args.embed, args.hidden, args.depth)
    if args.gpu >= 0:
        parser.to_gpu()

    opt = optimizers.AdaGrad(lr=0.01)
    opt.setup(parser)
    opt.add_hook(optimizer.GradientClipping(10))
    opt.add_hook(optimizer.WeightDecay(0.0001))

    for epoch in range(args.epoch):
        random.shuffle(train_dataset)

        parser.zerograds()
        loss = XP.fzeros(())

        for i, data in enumerate(train_dataset):
            trace('epoch %3d: train sample %6d:' % (epoch + 1, i + 1))
            parent_scores, root_scores = parser.forward(data)
            if len(data) > 1:
                parent_scores = functions.split_axis(parent_scores, len(data),
                                                     0)
            else:
                parent_scores = (parent_scores, )

            root = -1
            for j, (p_scores, (wid,
                               parent)) in enumerate(zip(parent_scores, data)):
                if parent == -1:
                    trace('  %3d: root' % j)
                    root = j
                else:
                    parent_est = p_scores.data.argmax()
                    trace('%c %3d -> %3d (%3d)' %
                          ('*' if parent == parent_est else ' ', j, parent_est,
                           parent))
                    loss += functions.softmax_cross_entropy(
                        p_scores, XP.iarray([parent]))

            root_est = root_scores.data.argmax()
            trace('ROOT: %3d (%3d)' % (root_est, root))
            loss += functions.softmax_cross_entropy(root_scores,
                                                    XP.iarray([root]))

            if (i + 1) % 200 == 0:
                loss.backward()
                opt.update()
                parser.zerograds()
                loss = XP.fzeros(())

        loss.backward()
        opt.update()
        trace('epoch %3d: trained.                        ' % (epoch + 1))

        parent_num = 0
        parent_match = 0
        root_num = 0
        root_match = 0
        for i, data in enumerate(dev_dataset):
            trace('epoch %3d: dev sample %6d:' % (epoch + 1, i + 1),
                  rollback=True)
            parent_scores, root_scores = parser.forward(data)
            if len(data) > 1:
                parent_scores = functions.split_axis(parent_scores, len(data),
                                                     0)
            else:
                parent_scores = (parent_scores, )

            root = -1
            for j, (p_scores, (wid,
                               parent)) in enumerate(zip(parent_scores, data)):
                if parent == -1:
                    root = j
                else:
                    parent_est = p_scores.data.argmax()
                    parent_num += 1
                    parent_match += 1 if parent_est == parent else 0

            root_est = root_scores.data.argmax()
            root_num += 1
            root_match += 1 if root_est == root else 0

        result_str = \
          'epoch %3d: dev: parent-acc = %.4f (%5d/%5d), root-acc = %.4f (%4d/%4d)' % \
          ( \
            epoch + 1, \
            parent_match / parent_num, parent_match, parent_num, \
            root_match / root_num, root_match, root_num)
        trace(result_str)

        with open(args.model + '.log', 'a') as fp:
            print(result_str, file=fp)

        trace('epoch %3d: saving models ...' % (epoch + 1))
        prefix = args.model + '.%03d' % (epoch + 1)
        vocab.save(prefix + '.vocab')
        parser.save_spec(prefix + '.parent_spec')
        serializers.save_hdf5(prefix + '.parent_weights', parser)

    trace('finished.')
Exemplo n.º 46
0
 def forward(self, x, t):
     return F.softmax_cross_entropy(self.out(x), t)
Exemplo n.º 47
0
def main():
	# load MNIST images
	images, labels = dataset.load_train_images()

	# config
	config = aae.config

	# settings
	max_epoch = 1000
	num_trains_per_epoch = 5000
	batchsize = 100
	alpha = 1

	# seed
	np.random.seed(args.seed)
	if args.gpu_device != -1:
		cuda.cupy.random.seed(args.seed)

	# classification
	# 0 -> true sample
	# 1 -> generated sample
	class_true = aae.to_variable(np.zeros(batchsize, dtype=np.int32))
	class_fake = aae.to_variable(np.ones(batchsize, dtype=np.int32))

	# training
	progress = Progress()
	for epoch in xrange(1, max_epoch):
		progress.start_epoch(epoch, max_epoch)
		sum_loss_reconstruction = 0
		sum_loss_discriminator = 0
		sum_loss_generator = 0

		for t in xrange(num_trains_per_epoch):
			# sample from data distribution
			images_u = dataset.sample_unlabeled_data(images, batchsize)

			# reconstruction phase
			qy_x_u, z_u = aae.encode_x_yz(images_u, apply_softmax=True)
			reconstruction_u = aae.decode_yz_x(qy_x_u, z_u)
			loss_reconstruction = F.mean_squared_error(aae.to_variable(images_u), reconstruction_u)
			aae.backprop_generator(loss_reconstruction)
			aae.backprop_decoder(loss_reconstruction)

			# adversarial phase
			y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True)
			z_true_u = sampler.gaussian(batchsize, config.ndim_z, mean=0, var=1)
			y_true_u = sampler.onehot_categorical(batchsize, config.ndim_y)
			discrimination_z_true = aae.discriminate_z(z_true_u, apply_softmax=False)
			discrimination_y_true = aae.discriminate_y(y_true_u, apply_softmax=False)
			discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False)
			discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False)
			loss_discriminator_z = F.softmax_cross_entropy(discrimination_z_true, class_true) + F.softmax_cross_entropy(discrimination_z_fake, class_fake)
			loss_discriminator_y = F.softmax_cross_entropy(discrimination_y_true, class_true) + F.softmax_cross_entropy(discrimination_y_fake, class_fake)
			loss_discriminator = loss_discriminator_z + loss_discriminator_y
			aae.backprop_discriminator(loss_discriminator)

			# adversarial phase
			y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True)
			discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False)
			discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False)
			loss_generator_z = F.softmax_cross_entropy(discrimination_z_fake, class_true)
			loss_generator_y = F.softmax_cross_entropy(discrimination_y_fake, class_true)
			loss_generator = loss_generator_z + loss_generator_y
			aae.backprop_generator(loss_generator)

			sum_loss_reconstruction += float(loss_reconstruction.data)
			sum_loss_discriminator += float(loss_discriminator.data)
			sum_loss_generator += float(loss_generator.data)

			if t % 10 == 0:
				progress.show(t, num_trains_per_epoch, {})

		aae.save(args.model_dir)
		
		progress.show(num_trains_per_epoch, num_trains_per_epoch, {
			"loss_r": sum_loss_reconstruction / num_trains_per_epoch,
			"loss_d": sum_loss_discriminator / num_trains_per_epoch,
			"loss_g": sum_loss_generator / num_trains_per_epoch,
		})
Exemplo n.º 48
0
 def __call__(self, x, t):
     y = self.predictor(x)
     loss = F.softmax_cross_entropy(y, t)
     return loss
    use_dropout = 0.25
    label_size = 3
    knowledge_size = 2
    input_hidden = 3
    kelic_hidden = 5
    enrich_hidden = 5
    mlp_hidden = 3
    input_layers = 1
    enrich_layer = 1

    model = kim(word_embed, emb_dim, label_size, knowledge_size, input_hidden,
                kelic_hidden, enrich_hidden, mlp_hidden, input_layers,
                enrich_layer, use_dropout)
    optimizer = optimizers.AdaGrad()
    optimizer.use_cleargrads()
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(0.0001))

    if args.gpu >= 0:
        model.to_gpu()
        word_embed.to_gpu()

    for i in range(1, args.epoch + 1):
        system_start, system_end = model(x_list, y_list, x_mask, y_mask,
                                         knowledge)
        loss = F.softmax_cross_entropy(system_start, gold)
        print(F.argmax(system_start, axis=1), "loss:", loss.data)
        model.cleargrads()
        loss.backward()
        optimizer.update()
Exemplo n.º 50
0
 def __call__(self, x, t):
     return F.softmax_cross_entropy(self.out(x), t)
Exemplo n.º 51
0
def train():
    # model
    model = Mynet(train=True)

    if GPU >= 0:
        chainer.cuda.get_device(GPU).use()
        model.to_gpu()

    opt = chainer.optimizers.MomentumSGD(0.01, momentum=0.9)
    opt.setup(model)
    #opt.add_hook(chainer.optimizer.WeightDecay(0.0005))

    xs, ts, paths = data_load('../Dataset/train/images/', hf=True, vf=True)

    # training
    mb = 4
    mbi = 0
    train_ind = np.arange(len(xs))
    np.random.seed(0)
    np.random.shuffle(train_ind)

    for i in range(500):
        if mbi + mb > len(xs):
            mb_ind = train_ind[mbi:]
            np.random.shuffle(train_ind)
            mb_ind = np.hstack((mb_ind, train_ind[:(mb - (len(xs) - mbi))]))
            mbi = mb - (len(xs) - mbi)
        else:
            mb_ind = train_ind[mbi:mbi + mb]
            mbi += mb

        x = xs[mb_ind]
        t = ts[mb_ind]

        if GPU >= 0:
            x = chainer.cuda.to_gpu(x)
            t = chainer.cuda.to_gpu(t)
        #else:
        #    x = chainer.Variable(x)
        #    t = chainer.Variable(t)

        y = model(x)

        #accu = F.accuracy(y, t[..., 0])
        y = F.transpose(y, axes=(0, 2, 3, 1))
        y = F.reshape(y, [-1, num_classes + 1])
        t = F.reshape(t, [-1])
        loss = F.softmax_cross_entropy(y, t)
        accu = F.accuracy(y, t)

        model.cleargrads()
        loss.backward()
        opt.update()

        loss = loss.data
        accu = accu.data
        if GPU >= 0:
            loss = chainer.cuda.to_cpu(loss)
            accu = chainer.cuda.to_cpu(accu)

        print("iter >>", i + 1, ',loss >>', loss.item(), ',accuracy >>', accu)

    chainer.serializers.save_npz('cnn.npz', model)
        dis.zerograds()
        styleparam_g = Variable(chainer.cuda.to_gpu(
            styleg[batch:batch + batchsize]))
        styleparam = Variable(chainer.cuda.to_gpu(
            style[batch:batch + batchsize]))
        style_vector = gen(styleparam_g)
        dis_out1 = dis(style_vector) # にせもの
        dis_out2 = dis(styleparam) # ほんもの
        # if debug:
            # print('params:')
            # print(style_vector.data[2][0:10])
            # print(styleparam.data[2][0:10])
        # print(style_vector.data.shape,styleparam.data.shape)

        #    da = chainer.cuda.to_gpu(data[1])
        loss_gen = F.softmax_cross_entropy(dis_out1, Variable(xp.zeros(batchsize, dtype=np.int32))) # 生成したパラメータがどうなのかadversarial loss, 0本物に近づけたい
        loss_dis = F.softmax_cross_entropy(dis_out1, Variable(xp.ones(batchsize, dtype=np.int32))) # 1(偽物)に近づけたい
        loss_dis += F.softmax_cross_entropy(dis_out2, Variable(xp.zeros(batchsize, dtype=np.int32))) # 本物をdiscriminatorに入力. 0(本物)に近づけたい
        # data[1] = chainer.cuda.to_cpu(data[1])
        Lsum_gen += loss_gen
        Lsum_dis += loss_dis
        if batch < style.shape[0] * 0.9:
            loss_gen.backward()
            Optimizer_gen.update()
            loss_dis.backward()
            Optimizer_dis.update()
        # else:
            # print("val loss: gen:%s  dis:%s" % (loss_gen.data, loss_dis.data))
        batch += batchsize
        # if(batch % 1000 == 0):
        #    print('batch loss: ' + str(loss.data))
Exemplo n.º 53
0
    def __call__(self, hs, ys):
        '''Decoder forward

        :param Variable hs:
        :param Variable ys:
        :return:
        '''
        self.loss = None
        # prepare input and output word sequences with sos/eos IDs
        eos = self.xp.array([self.eos], 'i')
        sos = self.xp.array([self.sos], 'i')
        ys_in = [F.concat([sos, y], axis=0) for y in ys]
        ys_out = [F.concat([y, eos], axis=0) for y in ys]

        # padding for ys with -1
        # pys: utt x olen
        pad_ys_in = F.pad_sequence(ys_in, padding=self.eos)
        pad_ys_out = F.pad_sequence(ys_out, padding=-1)

        # get dim, length info
        batch = pad_ys_out.shape[0]
        olength = pad_ys_out.shape[1]
        logging.info(self.__class__.__name__ + ' input lengths:  ' +
                     str(self.xp.array([h.shape[0] for h in hs])))
        logging.info(self.__class__.__name__ + ' output lengths: ' +
                     str(self.xp.array([y.shape[0] for y in ys_out])))

        # initialization
        c_list = [None]  # list of cell state of each layer
        z_list = [None]  # list of hidden state of each layer
        for l in six.moves.range(1, self.dlayers):
            c_list.append(None)
            z_list.append(None)
        att_w = None
        z_all = []
        self.att.reset()  # reset pre-computation of h

        # pre-computation of embedding
        eys = self.embed(pad_ys_in)  # utt x olen x zdim
        eys = F.separate(eys, axis=1)

        # loop for an output sequence
        for i in six.moves.range(olength):
            att_c, att_w = self.att(hs, z_list[0], att_w)
            if i > 0 and random.random() < self.sampling_probability:
                logging.info(' scheduled sampling ')
                z_out = self.output(z_all[-1])
                z_out = F.argmax(F.log_softmax(z_out), axis=1)
                z_out = self.embed(z_out)
                ey = F.hstack((z_out, att_c))  # utt x (zdim + hdim)
            else:
                ey = F.hstack((eys[i], att_c))  # utt x (zdim + hdim)
            c_list[0], z_list[0] = self.lstm0(c_list[0], z_list[0], ey)
            for l in six.moves.range(1, self.dlayers):
                c_list[l], z_list[l] = self['lstm%d' % l](c_list[l], z_list[l],
                                                          z_list[l - 1])
            z_all.append(z_list[-1])

        z_all = F.reshape(F.stack(z_all, axis=1),
                          (batch * olength, self.dunits))
        # compute loss
        y_all = self.output(z_all)
        self.loss = F.softmax_cross_entropy(y_all, F.flatten(pad_ys_out))
        # -1: eos, which is removed in the loss computation
        self.loss *= (np.mean([len(x) for x in ys_in]) - 1)
        acc = F.accuracy(y_all, F.flatten(pad_ys_out), ignore_label=-1)
        logging.info('att loss:' + str(self.loss.data))

        # show predicted character sequence for debug
        if self.verbose > 0 and self.char_list is not None:
            y_hat = F.reshape(y_all, (batch, olength, -1))
            y_true = pad_ys_out
            for (i, y_hat_), y_true_ in zip(enumerate(y_hat.data),
                                            y_true.data):
                if i == MAX_DECODER_OUTPUT:
                    break
                idx_hat = self.xp.argmax(y_hat_[y_true_ != -1], axis=1)
                idx_true = y_true_[y_true_ != -1]
                seq_hat = [self.char_list[int(idx)] for idx in idx_hat]
                seq_true = [self.char_list[int(idx)] for idx in idx_true]
                seq_hat = "".join(seq_hat).replace('<space>', ' ')
                seq_true = "".join(seq_true).replace('<space>', ' ')
                logging.info("groundtruth[%d]: " % i + seq_true)
                logging.info("prediction [%d]: " % i + seq_hat)

        if self.labeldist is not None:
            if self.vlabeldist is None:
                self.vlabeldist = chainer.Variable(
                    self.xp.asarray(self.labeldist))
            loss_reg = -F.sum(
                F.scale(F.log_softmax(y_all), self.vlabeldist,
                        axis=1)) / len(ys_in)
            self.loss = (
                1. - self.lsm_weight) * self.loss + self.lsm_weight * loss_reg

        return self.loss, acc
Exemplo n.º 54
0
    def _check_forward(self, mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k):
        if self.variable:
            mb_locs = chainer.Variable(mb_locs)
            mb_confs = chainer.Variable(mb_confs)
            gt_mb_locs = chainer.Variable(gt_mb_locs)
            gt_mb_labels = chainer.Variable(gt_mb_labels)

        loc_loss, conf_loss = multibox_loss(mb_locs, mb_confs, gt_mb_locs,
                                            gt_mb_labels, k)

        self.assertIsInstance(loc_loss, chainer.Variable)
        self.assertEqual(loc_loss.shape, ())
        self.assertEqual(loc_loss.dtype, mb_locs.dtype)

        self.assertIsInstance(conf_loss, chainer.Variable)
        self.assertEqual(conf_loss.shape, ())
        self.assertEqual(conf_loss.dtype, mb_confs.dtype)

        if self.variable:
            mb_locs = mb_locs.array
            mb_confs = mb_confs.array
            gt_mb_locs = gt_mb_locs.array
            gt_mb_labels = gt_mb_labels.array

        mb_locs = cuda.to_cpu(mb_locs)
        mb_confs = cuda.to_cpu(mb_confs)
        gt_mb_locs = cuda.to_cpu(gt_mb_locs)
        gt_mb_labels = cuda.to_cpu(gt_mb_labels)
        loc_loss = cuda.to_cpu(loc_loss.array)
        conf_loss = cuda.to_cpu(conf_loss.array)

        n_positive_total = 0
        expect_loc_loss = 0
        expect_conf_loss = 0
        for i in six.moves.xrange(gt_mb_labels.shape[0]):
            n_positive = 0
            negatives = []
            for j in six.moves.xrange(gt_mb_labels.shape[1]):
                loc = F.huber_loss(mb_locs[np.newaxis, i, j],
                                   gt_mb_locs[np.newaxis, i, j], 1).array
                conf = F.softmax_cross_entropy(mb_confs[np.newaxis, i, j],
                                               gt_mb_labels[np.newaxis, i,
                                                            j]).array

                if gt_mb_labels[i, j] > 0:
                    n_positive += 1
                    expect_loc_loss += loc
                    expect_conf_loss += conf
                else:
                    negatives.append(conf)

            n_positive_total += n_positive
            if n_positive > 0:
                expect_conf_loss += sum(sorted(negatives)[-n_positive * k:])

        if n_positive_total == 0:
            expect_loc_loss = 0
            expect_conf_loss = 0
        else:
            expect_loc_loss /= n_positive_total
            expect_conf_loss /= n_positive_total

        np.testing.assert_almost_equal(loc_loss, expect_loc_loss, decimal=2)
        np.testing.assert_almost_equal(conf_loss, expect_conf_loss, decimal=2)
Exemplo n.º 55
0
# 5. Write a training loop
import numpy as np
from chainer.dataset import concat_examples
from chainer.cuda import to_cpu, to_gpu

max_epoch = 10
gpu_id = 0

model.to_gpu()

while train_iter.epoch < max_epoch:
    train_batch = train_iter.next()
    image_train, target_train = concat_examples(train_batch, gpu_id)
    prediction_train = model(image_train)
    loss = F.softmax_cross_entropy(prediction_train, target_train)
    model.cleargrads()
    loss.backward()
    optimizer.update()

    if train_iter.is_new_epoch:
        print('epoch:{:02d} train_loss:{:.04f} '.format(train_iter.epoch, float(to_cpu(loss.data))), end='')
        test_losses = []
        test_accuracies = []
        while True:
            test_batch = test_iter.next()
            image_test, target_test = concat_examples(test_batch, gpu_id)
            prediction_test = model(image_test)
            loss_test = F.softmax_cross_entropy(prediction_test, target_test)
            test_losses.append(to_cpu(loss_test.data))
            accuracy = F.accuracy(prediction_test, target_test)
Exemplo n.º 56
0
def _elementwise_softmax_cross_entropy(x, t):
    assert x.shape[:-1] == t.shape
    shape = t.shape
    x = F.reshape(x, (-1, x.shape[-1]))
    t = F.flatten(t)
    return F.reshape(F.softmax_cross_entropy(x, t, reduce='no'), shape)
Exemplo n.º 57
0
    def update_core(self):
        # TODO: support n_Classfier <- いる?
        # TIPS: in case of experiments, set n_critic as 5 is best result.
        gen_optimizer = self.get_optimizer('gen')
        critic_optimizer = self.get_optimizer('critic')
        clfr_optimizer = self.get_optimizer('classfier')
        xp = self.generator.xp

        for i in range(self.n_critic):
            # grab data
            batch = self.get_iterator('main').next()
            batchsize = len(batch)
            batch = self.converter(batch, self.device)
            real_data, real_label = batch
            real_label = Variable(real_label)
            real_data = Variable(real_data) / 255.

            # TODO: cWGANってuniformで良いんだっけ...?
            z = Variable(
                xp.asarray(
                    self.generator.make_input_z_with_label(
                        batchsize, real_label.data)))

            # Generator
            gen_data = self.generator(z)
            gen_data = gen_data.reshape(batchsize, 1, 28, 28)

            # Critic(Discrimintor)
            critic_real = self.critic(real_data)
            critic_fake = self.critic(gen_data)

            # Classifier
            # classifier_real = self.classifier(real_data)
            # classifier_fake = self.classifier(gen_data)

            # Loss
            ## Critic Loss
            # print(critic_fake.shape, critic_real.shape, gen_data.shape, real_data.shape)
            # critic_loss = F.mean(critic_fake - critic_real)

            e = xp.random.uniform(0., 1.,
                                  (batchsize, 1, 1, 1)).astype(np.float32)
            x_hat = e * real_data + (1 - e) * gen_data  # recreate Variable

            loss_gan = F.average(critic_fake - critic_real)
            # x_hat.backward(retain_grad=True, enable_double_backprop=True)
            grad, = chainer.grad([self.critic(x_hat)], [x_hat],
                                 enable_double_backprop=True)
            grad = F.sqrt(F.batch_l2_norm_squared(grad))

            loss_grad = self.l * F.mean_absolute_error(grad,
                                                       xp.ones_like(grad.data))

            critic_loss = loss_gan + loss_grad

            self.critic.cleargrads()
            critic_loss.backward()
            critic_optimizer.update()
            chainer.report({'critic_loss': critic_loss})
            chainer.report({'loss_grad': loss_grad})
            chainer.report({'loss_gan': loss_gan})

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        batch = self.converter(batch, self.device)
        real_data, real_label = batch
        real_label = Variable(real_label)
        real_data = Variable(real_data) / 255.

        z = Variable(
            xp.asarray(
                self.generator.make_input_z_with_label(batchsize,
                                                       real_label.data)))

        # Generator
        gen_data = self.generator(z)

        # Critic(Discrimintor)
        critic_fake = self.critic(gen_data)

        # Classifier
        classifier_real = self.classifier(real_data)
        classifier_fake = self.classifier(gen_data)

        ## Categorical Loss
        c_f_loss = F.softmax_cross_entropy(classifier_fake, real_label)
        c_r_loss = F.softmax_cross_entropy(classifier_real, real_label)
        c_loss = (c_r_loss + c_f_loss) / 2

        self.classifier.cleargrads()
        c_loss.backward()
        clfr_optimizer.update()
        chainer.report({'c_r_loss': c_r_loss})
        chainer.report({'c_loss': c_loss})

        #  Generator Loss
        gen_loss = F.average(-critic_fake)

        self.generator.cleargrads()
        gen_loss.backward()
        gen_optimizer.update()
        chainer.report({'gen_loss': gen_loss})

        self.classifier.cleargrads()
        c_f_loss.backward()
        gen_optimizer.update()
        chainer.report({'c_f_loss': c_f_loss})
Exemplo n.º 58
0
def train_dcgan_labeled(evol, dis, proj, epoch0=0):
    global epoch
    o_evol = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_evol.setup(evol)
    o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_dis.setup(dis)
    o_proj = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_proj.setup(proj)
    if not args.fresh_start:
        serializers.load_hdf5("%s/dcgan_model_evol.h5" % (out_model_dir), evol)
        serializers.load_hdf5("%s/dcgan_state_evol.h5" % (out_model_dir),
                              o_evol)
        serializers.load_hdf5("%s/dcgan_model_dis.h5" % (out_model_dir), dis)
        serializers.load_hdf5("%s/dcgan_state_dis.h5" % (out_model_dir), o_dis)
        serializers.load_hdf5("%s/dcgan_model_proj.h5" % (out_model_dir), proj)
        serializers.load_hdf5("%s/dcgan_state_proj.h5" % (out_model_dir),
                              o_proj)

    o_evol.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_proj.add_hook(chainer.optimizer.WeightDecay(0.00001))

    vis_process = None
    for epoch in xrange(epoch0, n_epoch):
        for train_ctr in xrange(0, n_train, batchsize):
            print "epoch:", epoch, "train:", train_ctr,
            # discriminator
            # 0: from dataset
            # 1: from noise

            good_movie = True
            prediction_movie = n_movie * [None]
            try:
                current_movie = load_movie()
            except:
                continue

            for i in range(n_timeseries - 1):
                if current_movie[i] is None:
                    good_movie = False
                else:
                    prediction_movie[i] = current_movie[i]
            if not good_movie: continue
            for i in range(n_timeseries - 1, n_movie):
                prediction_movie[i] = evolve_image(
                    evol, proj, prediction_movie[i - n_timeseries + 1:i])

            if train_ctr % save_interval == 0:
                for answer_mode in ['predict', 'observe']:
                    for offset in [n_timeseries, 16, 32, 64, 119]:
                        if offset >= n_movie: continue
                        img_prediction = prediction_movie[offset]
                        if answer_mode == 'observe':
                            img_prediction = current_movie[offset]
                        if img_prediction is None: continue
                        imgfn = '%s/futuresun_%d_%04d_%s+%03d.png' % (
                            out_image_dir, epoch, train_ctr, answer_mode,
                            offset)
                        plt.rcParams['figure.figsize'] = (12.0, 12.0)
                        plt.close('all')
                        plt.imshow(img_prediction, vmin=0, vmax=1.4)
                        plt.suptitle(imgfn)
                        plt.savefig(imgfn)
                        subprocess.call("cp %s ~/public_html/futuresun/" %
                                        (imgfn),
                                        shell=True)

                # we don't have enough disk for history
                history_dir = 'history/'  #%d-%d'%(epoch,  train_ctr)
                subprocess.call("mkdir -p %s " % (history_dir), shell=True)
                subprocess.call("cp %s/*.h5 %s " %
                                (out_model_dir, history_dir),
                                shell=True)

                if epoch > 0 or train_ctr > 0:
                    print 'saving model...'
                    serializers.save_hdf5(
                        "%s/dcgan_model_evol.h5" % (out_model_dir), evol)
                    serializers.save_hdf5(
                        "%s/dcgan_state_evol.h5" % (out_model_dir), o_evol)
                    serializers.save_hdf5(
                        "%s/dcgan_model_dis.h5" % (out_model_dir), dis)
                    serializers.save_hdf5(
                        "%s/dcgan_state_dis.h5" % (out_model_dir), o_dis)
                    serializers.save_hdf5(
                        "%s/dcgan_model_proj.h5" % (out_model_dir), proj)
                    serializers.save_hdf5(
                        "%s/dcgan_state_proj.h5" % (out_model_dir), o_proj)
                    print '...saved.'

            movie_in = None
            movie_out = None
            movie_out_predict = None
            evol_scores = {}
            proj_scores = {}
            matsuoka_shuzo = {}
            shuzo_evoke_timestep = []
            difficulties = ['normal', 'hard']
            vis_kit = {}
            for difficulty in difficulties:
                evol_scores[difficulty] = [0.0]
                proj_scores[difficulty] = [0.0]
                matsuoka_shuzo[difficulty] = True
                vis_kit[difficulty] = None
            matsuoka_shuzo[
                'normal'] = False  # dameda, dameda.... Akirameyou....
            if vis_process is not None:
                vis_process.join()
                vis_process = None

            # start main training routine.
            print
            next_shuzo_scale = 10.0 * (1 + epoch)
            next_shuzo_offset = 1 + abs(
                int(round(np.random.normal(scale=next_shuzo_scale))))
            for train_offset in range(0, n_movie - n_timeseries):
                for difficulty in difficulties:
                    movie_clip = current_movie
                    if not matsuoka_shuzo[difficulty]:
                        # Doushitesokode yamerunda...
                        continue
                    else:
                        # Akiramen'nayo!
                        pass

                    if difficulty == 'normal':
                        movie_clip_in = movie_clip
                    else:
                        movie_clip_in = prediction_movie
                    maybe_dat = create_batch(train_offset, movie_clip_in,
                                             movie_clip)
                    if not maybe_dat:
                        #print "Warning: skip offset", train_offset, "because of unavailable data."
                        continue
                    data_in, data_out, data_other = maybe_dat
                    movie_in = Variable(cuda.to_gpu(data_in))
                    movie_out = Variable(cuda.to_gpu(data_out))
                    movie_other = Variable(cuda.to_gpu(data_other))

                    movie_out_predict_before = evol(movie_in)
                    movie_out_predict = proj(
                        movie_out_predict_before)  # no proj

                    vis_kit[difficulty] = (movie_in.data.get(),
                                           movie_out.data.get(),
                                           movie_out_predict_before.data.get(),
                                           movie_out_predict.data.get())

                    if args.norm == 'dcgan':
                        yl = dis(movie_in, movie_out_predict)
                        L_evol = F.softmax_cross_entropy(
                            yl, Variable(xp.zeros(batchsize, dtype=np.int32)))
                        L_dis = F.softmax_cross_entropy(
                            yl, Variable(xp.ones(batchsize, dtype=np.int32)))

                        # train discriminator
                        yl_train = dis(movie_in, movie_out)
                        L_dis += F.softmax_cross_entropy(
                            yl_train,
                            Variable(xp.zeros(batchsize, dtype=np.int32)))
                    elif args.norm == 'CA':
                        L_evol = d_norm(0, dis, movie_out,
                                        movie_out_predict_before)
                        L_proj = d_norm(0, dis, movie_out, movie_out_predict)
                        L_dis = d_norm(1, dis, movie_out,
                                       movie_out_predict_before)
                        # L_dis  += d_norm(1, dis, movie_out, movie_out_predict)
                        L_dis += d_norm(0, dis, movie_out, movie_other)
                        # L_dis  += d_norm(0, dis, movie_other, movie_out)
                    else:
                        L2norm = (movie_out - movie_out_predict)**2
                        yl = F.sum(L2norm) / L2norm.data.size
                        L_evol = yl

                    evol_scores[difficulty] += [
                        L_evol.data.get()
                    ]  # np.average(F.softmax(yl).data.get()[:,0])
                    proj_scores[difficulty] += [
                        L_proj.data.get()
                    ]  # np.average(F.softmax(yl).data.get()[:,0])

                    # stop learning on normal mode.
                    if difficulty == 'hard':
                        o_evol.zero_grads()
                        L_evol.backward()
                        o_evol.update()

                        o_dis.zero_grads()
                        L_dis.backward()
                        o_dis.update()

                        o_proj.zero_grads()
                        L_proj.backward()
                        o_proj.update()

                    movie_in.unchain_backward()
                    movie_out_predict.unchain_backward()
                    movie_out_predict_before.unchain_backward()
                    movie_other.unchain_backward()
                    L_evol.unchain_backward()
                    if args.norm == 'dcgan' or args.norm == 'CA':
                        L_dis.unchain_backward()

                    sys.stdout.write(
                        '%d %6s %s: %f -> %f, %f -> %f shuzo:%s\r' %
                        (train_offset, difficulty, args.norm,
                         np.average(evol_scores['normal']),
                         np.average(proj_scores['normal']),
                         np.average(evol_scores['hard']),
                         np.average(proj_scores['hard']),
                         str(shuzo_evoke_timestep[-10:])))
                    sys.stdout.flush()

                    # update the prediction as results of learning.
                    prediction_movie[
                        train_offset + n_timeseries - 1] = evolve_image(
                            evol, proj,
                            prediction_movie[train_offset:train_offset +
                                             n_timeseries - 1])

                    # prevent too much learning from noisy prediction.
                    # if len(evol_scores['hard'])>=10 and np.average(evol_scores['hard'][-5:-1]) > 5 * np.average(evol_scores['normal']):
                    if train_offset == next_shuzo_offset:
                        next_shuzo_offset = train_offset + 1 + abs(
                            int(round(
                                np.random.normal(scale=next_shuzo_scale))))
                        # Zettaini, akiramennna yo!
                        # matsuoka_shuzo['hard'] = False
                        shuzo_evoke_timestep += [train_offset]
                        evol_scores['hard'] = [0.0]
                        proj_scores['hard'] = [0.0]
                        for t in range(train_offset,
                                       train_offset + n_timeseries):
                            if current_movie[t] is not None:
                                prediction_movie[t] = current_movie[t]

            print

            def visualize_vis_kit(vis_kit):
                print "visualizing...",
                sys.stdout.flush()
                for difficulty in difficulties:
                    if vis_kit[difficulty] is None:
                        continue
                    movie_data, movie_out_data, movie_pred_data, movie_proj_data = vis_kit[
                        difficulty]
                    imgfn = '%s/batch-%s_%d_%04d.png' % (
                        out_image_dir, difficulty, epoch, train_ctr)

                    n_col = n_timeseries + 3
                    plt.rcParams['figure.figsize'] = (1.0 * n_col,
                                                      1.0 * batchsize)
                    plt.close('all')

                    for ib in range(batchsize):
                        for j in range(n_timeseries - 1):
                            plt.subplot(batchsize, n_col, 1 + ib * n_col + j)
                            if j < 2:
                                vmin = -1
                                vmax = 1
                            else:
                                vmin = 0
                                vmax = 1.4
                            plt.imshow(movie_data[ib, j, :, :],
                                       vmin=vmin,
                                       vmax=vmax)
                            plt.axis('off')

                        plt.subplot(batchsize, n_col,
                                    1 + ib * n_col + n_timeseries - 1)
                        plt.imshow(movie_pred_data[ib, 0, :, :],
                                   vmin=0,
                                   vmax=1.4)
                        plt.axis('off')

                        plt.subplot(batchsize, n_col,
                                    1 + ib * n_col + n_timeseries)
                        plt.imshow(movie_proj_data[ib, 0, :, :],
                                   vmin=0,
                                   vmax=1.4)
                        plt.axis('off')

                        plt.subplot(batchsize, n_col,
                                    1 + ib * n_col + n_timeseries + 2)
                        plt.imshow(movie_out_data[ib, 0, :, :],
                                   vmin=0,
                                   vmax=1.4)
                        plt.axis('off')

                    plt.suptitle(imgfn)
                    plt.savefig(imgfn)
                    subprocess.call(
                        "cp %s ~/public_html/suntomorrow-batch-%s-%s.png" %
                        (imgfn, difficulty, args.gpu),
                        shell=True)
                print "visualized.",
                sys.stdout.flush()

            vis_process = Process(target=visualize_vis_kit, args=(vis_kit, ))
            vis_process.start()
Exemplo n.º 59
0
model = MLP(10, 10, 2)

optimizer = optimizers.SGD()

optimizer.setup(model)

train_data_variable = Variable(train_data.astype(np.float32))
train_label_variable = Variable(train_label.astype(np.int32))

loss_log = []
for epoch in range(200):
    model.cleargrads()

    prod_label = model(train_data_variable)
    loss = F.softmax_cross_entropy(prod_label, train_label_variable)
    loss.backward()
    optimizer.update()
    loss_log.append(loss.data)

#print(loss_log)

print(test_data[0:10])
print("-----")
test_data_variable = Variable(test_data.astype(np.float32))
y = model(test_data_variable)

y = F.softmax(y)
print(y.data[0:50])
pred_label = np.argmax(y.data, 1)
print(pred_label[0:50])
Exemplo n.º 60
0
 def __call__(self, x, t):
     h = self.predict(x)
     loss = F.softmax_cross_entropy(h, t)
     chainer.report({'loss': loss, 'accuracy': F.accuracy(h, t)}, self)
     return loss