示例#1
0
def main():
    # get args
    parser = argparse.ArgumentParser(description='ConvolutionNN')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    # print config
    print("#GPU       : {}".format(args.gpu))
    print("#batchsize : {}".format(args.batchsize))
    print("#epoch     : {}".format(args.epoch))

    # Model
    model = Net(50, 10)
    # setting for using GPU
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # Optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Data
    train, test = chainer.datasets.get_mnist()
    # convert to 3 dim tensor (channel, height, width)
    X_train = [( t[0].reshape(1, 28, 28), t[1] ) for t in train ]
    X_test  = [( t[0].reshape(1, 28, 28), t[1] ) for t in test  ]
    train_iter = chainer.iterators.SerialIterator(X_train, args.batchsize)
    test_iter  = chainer.iterators.SerialIterator(X_test,  args.batchsize, repeat=False, shuffle=False)

    # print header
    print("Epoch\tloss(train)\taccuracy(test)")

    # train
    while train_iter.epoch < args.epoch:

        # next batch data
        train_batch = train_iter.next()
        (x, t) = convert.concat_examples(train_batch)

        # calculate loss and optimize params
        model.cleargrads()
        loss = model.forward(x, t)
        loss.backward()
        optimizer.update()

        # log every epoch
        if( train_iter.is_new_epoch ):

            # calculate accuracy
            (x, t) = convert.concat_examples(test_iter.dataset)
            model.forward( x, t )

            print("%d\t%f\t%f" % (train_iter.epoch, loss.data, model.accuracy.data))
示例#2
0
def main():
    # config
    max_epoch = 20
    batchsize = 100
    test_index = 22

    # make result directory
    os.makedirs('result', exist_ok=True)

    # Model
    model = AutoEncoder(784, 64, 784)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # make Dataset
    train, test = chainer.datasets.get_mnist()
    train_oneline = [t[0] for t in train]
    train_twoline = tuple_dataset.TupleDataset(train_oneline, train_oneline)
    train_iter = chainer.iterators.SerialIterator(train_twoline, batchsize)


    # print header
    print("Epoch\tloss(train)")

    # train
    while train_iter.epoch < max_epoch:

        train_batch = train_iter.next()
        (x, t) = convert.concat_examples(train_batch)

        model.cleargrads()
        loss = model.loss(x, t)
        loss.backward()
        optimizer.update()

        # log every epoch
        if( train_iter.is_new_epoch ):

            # loss
            (x, t) = convert.concat_examples( train_iter.dataset )
            loss_train = model.loss( x, t )

            print("%d\t%f" % (train_iter.epoch, loss_train.data))

            # plot predict data
            (x, t) = test[ test_index ]
            data = model.predictor( np.array([x]) ).data
            plot_mnist_data(data, t, 'result/epoch_{}.png'.format(train_iter.epoch))
            # plot hidden node
            data = model( np.array([x]), True ).data
            plot_mnist_data(data, t, 'result/epoch_{}_hidden.png'.format(train_iter.epoch), (8,8))
示例#3
0
    def extract(self, images, layers=['fc7'], size=(224, 224),
                test=True, volatile=flag.OFF):
        """Extracts all the feature maps of given images.

        The difference of directly executing ``__call__`` is that
        it directly accepts images as an input and automatically
        transforms them to a proper variable. That is,
        it is also interpreted as a shortcut method that implicitly calls
        ``prepare`` and ``__call__`` functions.

        Args:
            images (iterable of PIL.Image or numpy.ndarray): Input images.
            layers (list of str): The list of layer names you want to extract.
            size (pair of ints): The resolution of resized images used as
                an input of CNN. All the given images are not resized
                if this argument is ``None``, but the resolutions of
                all the images should be the same.
            test (bool): If ``True``, dropout runs in test mode.
            volatile (~chainer.Flag): Volatility flag used for input variables.

        Returns:
            Dictionary of ~chainer.Variable: A directory in which
            the key contains the layer name and the value contains
            the corresponding feature map variable.

        """

        x = concat_examples([prepare(img, size=size) for img in images])
        x = Variable(self.xp.asarray(x), volatile=volatile)
        return self(x, layers=layers, test=test)
示例#4
0
文件: vgg.py 项目: delta2323/chainer
    def predict(self, images, oversample=True):
        """Computes all the probabilities of given images.

        Args:
            images (iterable of PIL.Image or numpy.ndarray): Input images.
            oversample (bool): If ``True``, it averages results across
                center, corners, and mirrors. Otherwise, it uses only the
                center.

        Returns:
            ~chainer.Variable: Output that contains the class probabilities
            of given images.

        """

        x = concat_examples([prepare(img, size=(256, 256)) for img in images])
        if oversample:
            x = imgproc.oversample(x, crop_dims=(224, 224))
        else:
            x = x[:, :, 16:240, 16:240]
        # Use no_backprop_mode to reduce memory consumption
        with function.no_backprop_mode():
            x = Variable(self.xp.asarray(x))
            y = self(x, layers=['prob'])['prob']
            if oversample:
                n = y.data.shape[0] // 10
                y_shape = y.data.shape[1:]
                y = reshape(y, (n, 10) + y_shape)
                y = sum(y, axis=1) / 10
        return y
def seq2seq_pad_concat_convert(xy_batch, device, eos_id=0):
    """
    Args:
        xy_batch (list of tuple of two numpy.ndarray-s or cupy.ndarray-s):
            xy_batch[i][0] is an array
            of token ids of i-th input sentence in a minibatch.
            xy_batch[i][1] is an array
            of token ids of i-th target sentence in a minibatch.
            The shape of each array is `(sentence length, )`.
        device (int or None): Device ID to which an array is sent. If it is
            negative value, an array is sent to CPU. If it is positive, an
            array is sent to GPU with the given ID. If it is ``None``, an
            array is left in the original device.

    Returns:
        Tuple of Converted array.
            (input_sent_batch_array, target_sent_batch_input_array,
            target_sent_batch_output_array).
            The shape of each array is `(batchsize, max_sentence_length)`.
            All sentences are padded with -1 to reach max_sentence_length.
    """

    x_seqs, y_seqs = zip(*xy_batch)

    x_block = convert.concat_examples(x_seqs, device, padding=-1)
    y_block = convert.concat_examples(y_seqs, device, padding=-1)
    xp = cuda.get_array_module(x_block)

    # add eos
    x_block = xp.pad(x_block, ((0, 0), (0, 1)), 'constant', constant_values=-1)
    for i_batch, seq in enumerate(x_seqs):
        x_block[i_batch, len(seq)] = eos_id

    y_out_block = xp.pad(y_block, ((0, 0), (0, 1)),
                         'constant',
                         constant_values=-1)
    for i_batch, seq in enumerate(y_seqs):
        y_out_block[i_batch, len(seq)] = eos_id

    y_in_block = xp.pad(y_block, ((0, 0), (1, 0)),
                        'constant',
                        constant_values=eos_id)
    return (x_block, y_in_block, y_out_block)
示例#6
0
 def __fit(self, train_data, valid_data, test_data, callback):
     batch_size = self.batch_size
     train_iterator = chainer.iterators.SerialIterator(train_data, self.batch_size, repeat=True, shuffle=True)
     train_loss = 0
     train_acc = 0
     num = 0
     iteration = 0
     iteration_num = len(train_data) * self.epoch_num // self.batch_size
     while train_iterator.epoch < self.epoch_num:
         if self.lr_shape == 'cosine':
             lr = 0.5 * self.initial_lr * (1 + math.cos(math.pi * iteration / iteration_num))
             if hasattr(self.optimizer, 'alpha'):
                 self.optimizer.alpha = lr
             else:
                 self.optimizer.lr = lr
         batch = train_iterator.next()
         x_batch, y_batch = convert.concat_examples(batch, self.device_id)
         loss, acc = self.__forward(x_batch, y_batch)
         self.net.cleargrads()
         loss.backward()
         self.optimizer.update()
         train_loss += float(loss.data) * len(x_batch)
         train_acc += float(acc.data) * len(x_batch)
         num += len(x_batch)
         iteration += 1
         if not train_iterator.is_new_epoch:
             continue
         train_loss /= num
         train_acc /= num
         valid_loss = None
         valid_acc = None
         if valid_data is not None:
             valid_loss, valid_acc = self.__evaluate(valid_data)
         test_loss = None
         test_acc = None
         test_time = 0
         if test_data is not None:
             start_clock = time.clock()
             test_loss, test_acc = self.__evaluate(test_data)
             test_time = time.clock() - start_clock
         epoch = train_iterator.epoch
         if callback is not None:
             callback(epoch, self.net, self.optimizer, train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc, test_time)
         train_loss = 0
         train_acc = 0
         num = 0
         if self.lr_shape == 'multistep':
             lr_decay = self.lr_decay
             if len(lr_decay) == 1 and lr_decay[0] > 0 and epoch % lr_decay[0] == 0 or epoch in lr_decay:
                 if hasattr(self.optimizer, 'alpha'):
                     self.optimizer.alpha *= 0.1
                 else:
                     self.optimizer.lr *= 0.1
     train_iterator.finalize()
示例#7
0
文件: resnet.py 项目: zghzdxs/chainer
    def extract(self, images, layers=None, size=(224, 224), **kwargs):
        """extract(self, images, layers=['pool5'], size=(224, 224))

        Extracts all the feature maps of given images.

        The difference of directly executing ``forward`` is that
        it directly accepts images as an input and automatically
        transforms them to a proper variable. That is,
        it is also interpreted as a shortcut method that implicitly calls
        ``prepare`` and ``forward`` functions.

        Unlike ``predict`` method, this method does not override
        ``chainer.config.train`` and ``chainer.config.enable_backprop``
        configuration. If you want to extract features without updating
        model parameters, you need to manually set configuration when
        calling this method as follows:

         .. code-block:: python

             # model is an instance of ResNetLayers (50 or 101 or 152 layers)
             with chainer.using_config('train', False):
                 with chainer.using_config('enable_backprop', False):
                     feature = model.extract([image])

        Args:
            images (iterable of PIL.Image or numpy.ndarray): Input images.
            layers (list of str): The list of layer names you want to extract.
            size (pair of ints): The resolution of resized images used as
                an input of CNN. All the given images are not resized
                if this argument is ``None``, but the resolutions of
                all the images should be the same.

        Returns:
            Dictionary of ~chainer.Variable: A directory in which
            the key contains the layer name and the value contains
            the corresponding feature map variable.

        """

        if layers is None:
            layers = ['pool5']

        if kwargs:
            argument.check_unexpected_kwargs(
                kwargs,
                test='test argument is not supported anymore. '
                'Use chainer.using_config',
                volatile='volatile argument is not supported anymore. '
                'Use chainer.using_config')
            argument.assert_kwargs_empty(kwargs)

        x = concat_examples([prepare(img, size=size) for img in images])
        x = Variable(self.xp.asarray(x))
        return self(x, layers=layers)
示例#8
0
def train(args):
    model = JaCCGEmbeddingTagger(args.model, args.word_emb_size,
                                 args.char_emb_size)
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print('Load pretrained word embeddings from', args.pretrained)
        model.load_pretrained_embeddings(args.pretrained)

    train = JaCCGTaggerDataset(args.model, args.train)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val = JaCCGTaggerDataset(args.model, args.val)
    val_iter = chainer.iterators.SerialIterator(val,
                                                args.batchsize,
                                                repeat=False,
                                                shuffle=False)
    optimizer = chainer.optimizers.AdaGrad()
    optimizer.setup(model)
    # optimizer.add_hook(WeightDecay(1e-8))
    my_converter = lambda x, dev: convert.concat_examples(
        x, dev, (None, -1, None, None))
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=my_converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 1000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(val_iter, eval_model, my_converter),
                   trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch',
        'iteration',
        'main/loss',
        'validation/main/loss',
        'main/accuracy',
        'validation/main/accuracy',
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
示例#9
0
文件: googlenet.py 项目: hvy/chainer
    def extract(self, images, layers=None, size=(224, 224), **kwargs):
        """extract(self, images, layers=['pool5'], size=(224, 224))

        Extracts all the feature maps of given images.

        The difference of directly executing ``forward`` is that
        it directly accepts images as an input and automatically
        transforms them to a proper variable. That is,
        it is also interpreted as a shortcut method that implicitly calls
        ``prepare`` and ``forward`` functions.

        Unlike ``predict`` method, this method does not override
        ``chainer.config.train`` and ``chainer.config.enable_backprop``
        configuration. If you want to extract features without updating
        model parameters, you need to manually set configuration when
        calling this method as follows:

         .. code-block:: python

             # model is an instance of `GoogLeNet`
             with chainer.using_config('train', False):
                 with chainer.using_config('enable_backprop', False):
                     feature = model.extract([image])

        Args:
            images (iterable of PIL.Image or numpy.ndarray): Input images.
            layers (list of str): The list of layer names you want to extract.
            size (pair of ints): The resolution of resized images used as
                an input of CNN. All the given images are not resized
                if this argument is ``None``, but the resolutions of
                all the images should be the same.

        Returns:
            Dictionary of ~chainer.Variable: A directory in which
            the key contains the layer name and the value contains
            the corresponding feature map variable.

        """

        if layers is None:
            layers = ['pool5']

        if kwargs:
            argument.check_unexpected_kwargs(
                kwargs, train='train argument is not supported anymore. '
                'Use chainer.using_config',
                volatile='volatile argument is not supported anymore. '
                'Use chainer.using_config')
            argument.assert_kwargs_empty(kwargs)

        x = concat_examples([prepare(img, size=size) for img in images])
        x = Variable(self.xp.asarray(x))
        return self(x, layers=layers)
示例#10
0
def source_pad_concat_convert(x_seqs, device, eos_id=0, bos_id=2):
    x_block = convert.concat_examples(x_seqs, device, padding=-1)
    xp = cuda.get_array_module(x_block)

    # add eos
    x_block = xp.pad(x_block, ((0, 0), (0, 1)), 'constant', constant_values=-1)
    for i_batch, seq in enumerate(x_seqs):
        x_block[i_batch, len(seq)] = eos_id
    x_block = xp.pad(x_block, ((0, 0), (1, 0)),
                     'constant',
                     constant_values=bos_id)
    return x_block
示例#11
0
文件: test.py 项目: jtpils/pointnet-4
 def kdnet_converter(batch, device=None, padding=None):
     # concat_examples to CPU at first.
     result = concat_examples(batch, device=None, padding=padding)
     out_list = []
     for elem in result:
         if elem.dtype != object:
             # Send to GPU for int/float dtype array.
             out_list.append(to_device(device, elem))
         else:
             # Do NOT send to GPU for dtype=object array.
             out_list.append(elem)
     return tuple(out_list)
示例#12
0
def source_pad_concat_convert(x_seqs, device, eos_id=1, bos_id=3):
    x_block = convert.concat_examples(x_seqs, device, padding=0)

    # add eos
    x_block = np.pad(x_block, ((0, 0), (0, 1)), 'constant', constant_values=0)
    for i_batch, seq in enumerate(x_seqs):
        x_block[i_batch, len(seq)] = eos_id

    x_block = np.pad(x_block, ((0, 0), (1, 0)),
                     'constant',
                     constant_values=bos_id)
    return x_block
def run_train_loop(optimizer, train_iter, test_iter, test_count, epoch,
                   device):
    model = optimizer.target

    train_count = 0
    sum_accuracy = 0
    sum_loss = 0
    while train_iter.epoch < epoch:
        batch = train_iter.next()
        x_array, t_array = convert.concat_examples(batch, device)
        x = chainer.Variable(x_array)
        t = chainer.Variable(t_array, requires_grad=False)
        optimizer.update(model, x, t)
        train_count += len(t)
        sum_loss += float(model.loss.array) * len(t)
        sum_accuracy += float(model.accuracy.array) * len(t)

        if train_iter.is_new_epoch:
            print('epoch: ', train_iter.epoch)
            print('train mean loss: {}, accuracy: {}'.format(
                sum_loss / train_count, sum_accuracy / train_count))
            # evaluation
            train_count = 0
            sum_accuracy = 0
            sum_loss = 0
            # It is good practice to turn off train mode during evaluation.
            with configuration.using_config('train', False):
                for batch in test_iter:
                    x_array, t_array = convert.concat_examples(batch, device)
                    x = chainer.Variable(x_array)
                    t = chainer.Variable(t_array, requires_grad=False)
                    loss = model(x, t)
                    sum_loss += float(loss.array) * len(t)
                    sum_accuracy += float(model.accuracy.array) * len(t)

            test_iter.reset()
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_accuracy / test_count))
            sum_accuracy = 0
            sum_loss = 0
示例#14
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--resume',
                        '-r',
                        default='result/model_iter_9',
                        help='Resume the training from snapshot')
    parser.add_argument('--deformable',
                        '-d',
                        type=int,
                        default=1,
                        help='use deformable convolutions')
    args = parser.parse_args()

    if args.deformable == 1:
        model = DeformableConvnet(10)
    else:
        model = Convnet(10)
    chainer.serializers.load_npz(args.resume, model)

    train, test = chainer.datasets.get_mnist(ndim=3)
    test = TransformDataset(test, transform)

    test_iter = chainer.iterators.SerialIterator(test,
                                                 batch_size=1,
                                                 repeat=False,
                                                 shuffle=False)

    threshold = 1
    for i in range(1):
        batch = test_iter.next()
        in_arrays = concat_examples(batch, device=None)
        in_vars = tuple(chainer.Variable(x) for x in in_arrays)
        img, label = in_vars
        model(img)
        feat = model.feat
        H, W = feat.shape[2:]
        center = F.sum(feat[:, :, H / 2, W / 2])
        center.grad = np.ones_like(center.data)
        model.zerograds()
        img.zerograd()
        center.backward(retain_grad=True)

        img_grad = img.grad[0]  # (1, 28, 28)

        img_grad_abs = (np.abs(img_grad) / np.max(np.abs(img_grad)) *
                        255)[0]  # 28, 28
        img_grad_abs[np.isnan(img_grad_abs)] = 0
        y_indices, x_indices = np.where(img_grad_abs > threshold)
        plt.scatter(x_indices, y_indices, c='red')
        vis_img = transforms.chw_to_pil_image(255 * img.data[0])[:, :, 0]
        plt.imshow(vis_img, interpolation='nearest', cmap='gray')
        plt.show()
示例#15
0
    def predict(model, test_iter):
        probs = []
        test_iter.reset()

        for batch in test_iter:
            in_arrays = convert.concat_examples(batch, args.gpu)

            with chainer.using_config('train', False), \
                 chainer.using_config('enable_backprop', False):
                y = model(in_arrays[0])
                prob = chainer.functions.softmax(y)
                probs.append(prob.data)
        return concat_arrays(probs)
示例#16
0
def evaluate(model, it, device):
    """
    evaluation
    """
    test_loss = 0
    test_accuracy = 0
    for batch in it:
        x, t = convert.concat_examples(batch, device)
        test_loss += model(x, t) * len(batch)
        test_accuracy += model.accuracy * len(batch)

    logger.plot('test loss', test_loss / len(it.dataset))
    logger.plot('test accuracy', test_accuracy / len(it.dataset))
    def __init__(self, dataset, batch_size, repeat=True, shuffle=True):
        super(custom_iterator, self).__init__(dataset,
                                              batch_size,
                                              repeat=repeat,
                                              shuffle=shuffle)

        label_array = convert.concat_examples(dataset)[1]
        self.labels = numpy.sort(numpy.unique(label_array))

        self.label_cnt = collections.OrderedDict()
        for l in self.labels:
            cnt = len(numpy.where(label_array == l)[0])
            self.label_cnt[l] = cnt
示例#18
0
    def resize_converter(batch, device=None, padding=None):
        new_batch = []
        for image in batch:
            C, W, H = image.shape

            if C == 4:
                image = image[:3, :, :]

            if W < H:
                offset = (H - W) // 2
                image = image[:, :, offset:offset + W]
            elif W > H:
                offset = (W - H) // 2
                image = image[:, offset:offset + H, :]

            image = image.transpose(1, 2, 0)
            image = imresize(image, (args.image_size, args.image_size),
                             interp='bilinear')
            image = image.transpose(2, 0, 1)

            image = image / 255.  # 0. ~ 1.

            # Augumentation... Random vertical flip
            if np.random.rand() < 0.5:
                image = image[:, :, ::-1]

            # Augumentation... Tone correction
            mode = np.random.randint(4)
            # mode == 0 -> no correction
            if mode == 1:
                gain = 0.2 * np.random.rand() + 0.9  # 0.9 ~ 1.1
                image = np.power(image, gain)
            elif mode == 2:
                gain = 1.5 * np.random.rand() + 1e-10  # 0 ~ 1.5
                image = np.tanh(gain * (image - 0.5))

                range_min = np.tanh(gain * (-0.5))  # @x=0.5
                range_max = np.tanh(gain * 0.5)  # @x=1.0
                image = (image - range_min) / (range_max - range_min)
            elif mode == 3:
                gain = 2.0 * np.random.rand() + 1e-10  # 0 ~ 1.5
                image = np.sinh(gain * (image - 0.5))

                range_min = np.tanh(gain * (-0.5))  # @x=0.5
                range_max = np.tanh(gain * 0.5)  # @x=1.0
                image = (image - range_min) / (range_max - range_min)

            image = 2. * image - 1.
            new_batch.append(image.astype(np.float32))
        return concat_examples(new_batch, device=device, padding=padding)
示例#19
0
    def __call__(self, trainer):
        iterator = copy.copy(self.iterator)
        embedded_feats = []
        for v in iterator:
            self._check_type_dataset(v[0])
            arrays = convert.concat_examples(v,
                                             device=chainer.cuda.get_device(
                                                 self.target))
            h = forward(self.target, arrays[0:1],
                        forward_func=self.embed_func)[0]
            embedded_feats.append(h)
        embedded_feats = np.concatenate(embedded_feats, axis=0)

        np.save(osp.join(trainer.out, self.filename), embedded_feats)
示例#20
0
def concat_examples_one(batch, device=None, padding=None):
    """Concat examples in minibatch.

    :param np.ndarray batch: The batch to concatenate
    :param int device: The device to send to
    :param Tuple[int,int] padding: The padding to use
    :return: (inputs, targets)
    :rtype (torch.Tensor, torch.Tensor)
    """
    x = convert.concat_examples(batch, padding=padding)
    x = torch.from_numpy(x)
    if device is not None and device >= 0:
        x = x.cuda(device)
    return x
示例#21
0
    def make_preview(trainer):
        with chainer.using_config('train', False):
            with chainer.no_backprop_mode():
                x_a = iterator_a.next()
                x_a = convert.concat_examples(x_a, device)
                x_a = chainer.Variable(x_a)

                x_b = iterator_b.next()
                x_b = convert.concat_examples(x_b, device)
                x_b = chainer.Variable(x_b)

                x_ab = g_a(x_a)
                x_ba = g_b(x_b)

                x_bab = g_a(x_ba)
                x_aba = g_b(x_ab)

                preview_dir = '{}/preview'.format(dst)
                if not os.path.exists(preview_dir):
                    os.makedirs(preview_dir)
                image_dir = '{}/image'.format(dst)
                if not os.path.exists(image_dir):
                    os.makedirs(image_dir)

                names = ['a', 'ab', 'aba', 'b', 'ba', 'bab']
                images = [x_a, x_ab, x_aba, x_b, x_ba, x_bab]
                for n, i in zip(names, images):
                    i = cp.asnumpy(i.data)[:, :, padding:-padding, :].reshape(
                        1, -1, 128)
                    image.save(
                        image_dir +
                        '/{}{}.jpg'.format(trainer.updater.epoch, n), i)
                    w = np.concatenate(
                        [gla.inverse(_i) for _i in dataset.reverse(i)])
                    dataset.save(
                        preview_dir +
                        '/{}{}.wav'.format(trainer.updater.epoch, n), 16000, w)
示例#22
0
        def make_image(trainer):
            # read data
            batch_a = iterator_a.next()
            x_a = convert.concat_examples(batch_a, device)
            x_a = chainer.Variable(x_a, volatile='on')

            batch_b = iterator_b.next()
            x_b = convert.concat_examples(batch_b, device)
            x_b = chainer.Variable(x_b, volatile='on')

            # conversion
            x_ab = generator_ab(x_a, test=True)
            x_ba = generator_ba(x_b, test=True)

            # to cpu
            x_a = chainer.cuda.to_cpu(x_a.data)
            x_b = chainer.cuda.to_cpu(x_b.data)
            x_ab = chainer.cuda.to_cpu(x_ab.data)
            x_ba = chainer.cuda.to_cpu(x_ba.data)

            # reshape
            x = np.concatenate((x_a, x_ab, x_b, x_ba), 0)
            x = x.reshape(4, 10, 3, 64, 64)
            x = x.transpose(0, 3, 1, 4, 2)
            x = x.reshape((4 * 64, 10 * 64, 3))

            # to [0, 255]
            x += 1
            x *= (255 / 2)
            x = np.asarray(np.clip(x, 0, 255), dtype=np.uint8)

            preview_dir = '{}/preview'.format(dst)
            preview_path = preview_dir +\
                '/image{:0>5}.png'.format(trainer.updater.epoch)
            if not os.path.exists(preview_dir):
                os.makedirs(preview_dir)
            Image.fromarray(x).save(preview_path)
示例#23
0
    def train(x):
        epochs = 50
        stepsizes = [40]
        gamma = 0.1
        lr, momentum, h_units = x

        model = MLP(h_units)
        if args.gpu > -1:
            model.to_gpu()
        optimizer = chainer.optimizers.MomentumSGD(lr=lr, momentum=momentum)
        optimizer.setup(model)

        test_accuracy = 0
        for epoch in range(epochs):
            train_iter.reset()
            accuracy = []

            data_iter = copy.copy(train_iter)
            for batch in data_iter:
                x, t = concat_examples(batch, device=device)
                optimizer.update(model, x, t)
                accuracy.append(float(model.accuracy.data))
            train_accuracy = np.mean(accuracy)

            del accuracy[:]
            data_iter = copy.copy(test_iter)
            for batch in data_iter:
                x, t = concat_examples(batch, device=device)
                model(x, t)
                accuracy.append(float(model.accuracy.data))

            if (epoch + 1) in stepsizes:
                optimizer.lr *= gamma
            test_accuracy = np.mean(accuracy)
            # print(epoch, train_accuracy, test_accuracy)

        return test_accuracy
示例#24
0
    def extract(self, images, layers=None, size=(224, 224), **kwargs):
        """extract(self, images, layers=['pool5'], size=(224, 224))

        Extracts all the feature maps of given images.

        The difference of directly executing ``__call__`` is that
        it directly accepts images as an input and automatically
        transforms them to a proper variable. That is,
        it is also interpreted as a shortcut method that implicitly calls
        ``prepare`` and ``__call__`` functions.

        .. warning::

           ``train`` and ``volatile`` arguments are not supported anymore since
           v2.
           Instead, use ``chainer.using_config('train', train)`` and
           ``chainer.using_config('enable_backprop', not volatile)``
           respectively.
           See :func:`chainer.using_config`.

        Args:
            images (iterable of PIL.Image or numpy.ndarray): Input images.
            layers (list of str): The list of layer names you want to extract.
            size (pair of ints): The resolution of resized images used as
                an input of CNN. All the given images are not resized
                if this argument is ``None``, but the resolutions of
                all the images should be the same.

        Returns:
            Dictionary of ~chainer.Variable: A directory in which
            the key contains the layer name and the value contains
            the corresponding feature map variable.

        """

        if layers is None:
            layers = ['pool5']

        argument.check_unexpected_kwargs(
            kwargs,
            train='train argument is not supported anymore. '
            'Use chainer.using_config',
            volatile='volatile argument is not supported anymore. '
            'Use chainer.using_config')
        argument.assert_kwargs_empty(kwargs)

        x = concat_examples([prepare(img, size=size) for img in images])
        x = Variable(self.xp.asarray(x))
        return self(x, layers=layers)
示例#25
0
def eval(loss_function, iterator):
    """
    Evaluates the mean of given loss function over the entire batch in given
    iterator
    
    :param loss_function: The loss function to evaluate 
    :param iterator: The iterator over the evaluation data set
    :return: The mean loss value
    """
    iterator.reset()
    results = []
    for batch in iterator:
        input_args = convert.concat_examples(batch)
        results.append(loss_function(*input_args).data)
    return np.mean(results)
示例#26
0
def fact_pad_concat_convert(fact_batch, device, test=False):
    """

    Args:
        fact_batch: List of tuples of heads, relations, tails, and labels.
        device: Device ID to which an array is sent.
        test: If it is test, this flag should be true.

    Returns:
        Tuple of Converted array.

    """
    hs, rs, ts, ys = zip(*fact_batch)

    h_block = convert.concat_examples(hs, device, padding=PAD)
    r_block = convert.concat_examples(rs, device, padding=PAD)
    t_block = convert.concat_examples(ts, device, padding=PAD)
    if test is True:
        y_block = convert.concat_examples(ys, device, padding=PAD)
    else:
        # add negative example
        xp = cuda.get_array_module(h_block)
        h_block_neg = h_block.copy()
        r_block_neg = r_block.copy()
        t_block_neg = t_block.copy()
        xp.random.shuffle(h_block_neg)
        xp.random.shuffle(r_block_neg)
        xp.random.shuffle(t_block_neg)
        h_block = xp.concatenate((h_block, h_block_neg))
        r_block = xp.concatenate((r_block, r_block_neg))
        t_block = xp.concatenate((t_block, t_block_neg))
        y_block = convert.concat_examples(xp.concatenate(
            (xp.ones(len(hs), 'i'), xp.zeros(len(hs), 'i')), ),
                                          device,
                                          padding=PAD)
    return (h_block, r_block, t_block, y_block)
示例#27
0
 def evaluate(model, iter):
     # Evaluation routine to be used for validation and test.
     model.predictor.train = False
     evaluator = model.copy()  # to use different state
     evaluator.predictor.reset_state()  # initialize state
     evaluator.predictor.train = False  # dropout does nothing
     sum_perp = 0
     data_count = 0
     for batch in copy.copy(iter):
         x, t = convert.concat_examples(batch, args.gpu)
         loss = evaluator(x, t)
         sum_perp += loss.data
         data_count += 1
     model.predictor.train = True
     return np.exp(float(sum_perp) / data_count)
示例#28
0
def seq2seq_pad_concat_convert(xy_batch, device, eos_id=0, bos_id=2):
    x_seqs, y_seqs = zip(*xy_batch)

    x_block = convert.concat_examples(x_seqs, device, padding=-1)
    y_block = convert.concat_examples(y_seqs, device, padding=-1)
    xp = cuda.get_array_module(x_block)

    x_block = xp.pad(x_block, ((0, 0), (0, 1)), 'constant', constant_values=-1)
    for i_batch, seq in enumerate(x_seqs):
        x_block[i_batch, len(seq)] = eos_id
    x_block = xp.pad(x_block, ((0, 0), (1, 0)),
                     'constant',
                     constant_values=bos_id)

    y_out_block = xp.pad(y_block, ((0, 0), (0, 1)),
                         'constant',
                         constant_values=-1)
    for i_batch, seq in enumerate(y_seqs):
        y_out_block[i_batch, len(seq)] = eos_id

    y_in_block = xp.pad(y_block, ((0, 0), (1, 0)),
                        'constant',
                        constant_values=bos_id)
    return (x_block, y_in_block, y_out_block)
示例#29
0
 def __evaluate(self, data):
     iterator = chainer.iterators.SerialIterator(data, self.batch_size, repeat=False, shuffle=False)
     total_loss = 0
     total_acc = 0
     num = 0
     with chainer.using_config('enable_backprop', False):
         with chainer.using_config('train', False):
             for batch in iterator:
                 x_batch, y_batch = convert.concat_examples(batch, self.device_id)
                 loss, acc = self.__forward(x_batch, y_batch)
                 total_loss += float(loss.data) * len(x_batch)
                 total_acc += float(acc.data) * len(x_batch)
                 num += len(x_batch)
     iterator.finalize()
     return total_loss / num, total_acc / num
示例#30
0
 def evaluate(model, iter):
     # Evaluation routine to be used for validation and test.
     model.predictor.train = False
     evaluator = model.copy()  # to use different state
     evaluator.predictor.reset_state()  # initialize state
     evaluator.predictor.train = False  # dropout does nothing
     sum_perp = 0
     data_count = 0
     for batch in copy.copy(iter):
         x, t = convert.concat_examples(batch, args.gpu)
         loss = evaluator(x, t)
         sum_perp += loss.data
         data_count += 1
     model.predictor.train = True
     return np.exp(float(sum_perp) / data_count)
示例#31
0
    def extract(self, images, layers=None, size=(224, 224), **kwargs):
        """extract(self, images, layers=['pool5'], size=(224, 224))

        Extracts all the feature maps of given images.

        The difference of directly executing ``__call__`` is that
        it directly accepts images as an input and automatically
        transforms them to a proper variable. That is,
        it is also interpreted as a shortcut method that implicitly calls
        ``prepare`` and ``__call__`` functions.

        .. warning::

           ``train`` and ``volatile`` arguments are not supported anymore since
           v2.
           Instead, use ``chainer.using_config('train', train)`` and
           ``chainer.using_config('enable_backprop', not volatile)``
           respectively.
           See :func:`chainer.using_config`.

        Args:
            images (iterable of PIL.Image or numpy.ndarray): Input images.
            layers (list of str): The list of layer names you want to extract.
            size (pair of ints): The resolution of resized images used as
                an input of CNN. All the given images are not resized
                if this argument is ``None``, but the resolutions of
                all the images should be the same.

        Returns:
            Dictionary of ~chainer.Variable: A directory in which
            the key contains the layer name and the value contains
            the corresponding feature map variable.

        """

        if layers is None:
            layers = ['pool5']

        argument.check_unexpected_kwargs(
            kwargs, train='train argument is not supported anymore. '
            'Use chainer.using_config',
            volatile='volatile argument is not supported anymore. '
            'Use chainer.using_config')
        argument.assert_kwargs_empty(kwargs)

        x = concat_examples([prepare(img, size=size) for img in images])
        x = Variable(self.xp.asarray(x))
        return self(x, layers=layers)
示例#32
0
 def evaluate(model, iter):
     # Evaluation routine to be used for validation and test.
     evaluator = model.copy()  # to use different state
     evaluator.predictor.reset_state()  # initialize state
     sum_perp = 0
     data_count = 0
     # Enable evaluation mode.
     with configuration.using_config('train', False):
         # This is optional but can reduce computational overhead.
         with chainer.using_config('enable_backprop', False):
             for batch in copy.copy(iter):
                 x, t = convert.concat_examples(batch, args.gpu)
                 loss = evaluator(x, t)
                 sum_perp += loss.array
                 data_count += 1
     return np.exp(float(sum_perp) / data_count)
示例#33
0
    def __call__(self, trainer):
        if not _available:
            return
        features_file = osp.join(trainer.out, self.features_file)

        iterator = copy.copy(self.iterator)
        features = np.load(features_file)
        optimizer = trainer.updater.get_optimizer('main')

        classes = []
        for v in iterator:
            arrays = convert.concat_examples(v)
            classes.append(arrays[1])
        classes = np.concatenate(classes, axis=0)

        if features.shape[0] != classes.shape[0]:
            raise ValueError(
                'batch size of features and the class array differ')

        n = features.shape[0]
        n_match = {k: [] for k in self.ks}

        self.nbrs.fit(features)
        for i in six.moves.range(0, n, self.window_size):
            the_slice = slice(
                i * self.window_size, (i + 1) * self.window_size)
            src_features = features[the_slice]
            src_classes = classes[the_slice]

            indices = self.nbrs.kneighbors(
                src_features, n_neighbors=self.max_k + 1,
                return_distance=False)
            indices = indices[:, 1:]
            knbr_classes = classes[indices]  # (window, max_k)

            match = knbr_classes == src_classes[:, None]

            for k in self.ks:
                n_match_k = np.any(match[:, :k], axis=1)
                n_match[k].append(n_match_k)

        for k in self.ks:
            n_match[k] = np.concatenate(n_match[k])

            reporter.report({'recall@{}'.format(k): np.mean(n_match[k])},
                            optimizer.target)
示例#34
0
 def evaluate(model, iter, bproplen=100):
     # Evaluation routine to be used for validation and test.
     model.predictor.train = False
     evaluator = model.copy()  # to use different state
     state = None
     evaluator.predictor.train = False  # dropout does nothing
     sum_perp = 0
     data_count = 0
     for batch in copy.copy(iter):
         x, t = convert.concat_examples(batch, gpu_id)
         state, loss = evaluator(state, x, t)
         sum_perp += loss.data
         if data_count % bproplen == 0:
             loss.unchain_backward()  # Truncate the graph
         data_count += 1
     model.predictor.train = True
     return np.exp(float(sum_perp) / data_count)
示例#35
0
    def test_realvideo(self):
        dis = Discriminator()

        all_files = os.listdir(self.dataset)
        video_files = [f for f in all_files if ('mp4' in f)]

        train = PreprocessedDataset(paths=video_files, root=self.dataset)
        train_iter = chainer.iterators.SerialIterator(train, self.batchsize)
        batch = train_iter.next()

        x_real = Variable(convert.concat_examples(batch, self.gpu))
        self.assertEqual(
            (self.batchsize, 3, self.frame, self.height, self.width),
            x_real.shape)

        y_real = dis(x_real)
        self.assertEqual((self.batchsize, 1), y_real.shape)
示例#36
0
def seq_pad_concat(batch, device):
    labels, word_ids = zip(*batch)

    block_w = convert.concat_examples(word_ids,
                                      device,
                                      padding=preprocess.Vocab_Pad.PAD)

    sent_len = np.array(list(map(lambda x: len(x), word_ids)))
    # Converting from numpy format to Torch Tensor
    block_w = Variable(torch.LongTensor(block_w).type(LONG_TYPE),
                       requires_grad=False)
    labels = Variable(torch.LongTensor(labels).type(LONG_TYPE),
                      requires_grad=False)

    return Batch(batch_size=len(labels),
                 word_ids=block_w.transpose(0, 1).contiguous(),
                 labels=labels,
                 sent_len=sent_len)
示例#37
0
    def validation_measure(data_index, which_measure):
        x_array, t_array = convert.concat_examples(validation[data_index])
        x = chainer.Variable(x_array)
        y_validation_predict = model.forward_2(x).data
        if data_set_name[data_index] in ['cocnas', 'maxwell', 'opens']:
            y_validation_predict = np.power(math.e, y_validation_predict)
            t_array = np.power(math.e, t_array)

        if which_measure == 'Pred(25)':
            return criteria.pred25(t_array, y_validation_predict)
        elif which_measure == 'MdAE':
            return criteria.mae(t_array, y_validation_predict)
        elif which_measure == 'SA':
            return criteria.sa(t_array, y_validation_predict)
        elif which_measure == 'RE*':
            return criteria.re(t_array, y_validation_predict)
        else:
            return None
示例#38
0
 def __evaluate(self, data):
     iterator = chainer.iterators.SerialIterator(data,
                                                 self.batch_size,
                                                 repeat=False,
                                                 shuffle=False)
     total_loss = 0
     total_acc = 0
     num = 0
     with chainer.using_config('enable_backprop', False):
         with chainer.using_config('train', False):
             for batch in iterator:
                 x_batch, y_batch = convert.concat_examples(
                     batch, self.device_id)
                 loss, acc = self.__forward(x_batch, y_batch)
                 total_loss += float(loss.data) * len(x_batch)
                 total_acc += float(acc.data) * len(x_batch)
                 num += len(x_batch)
     iterator.finalize()
     return total_loss / num, total_acc / num
示例#39
0
文件: lm.py 项目: zqs01/espnet
 def evaluate(self):
     val_iter = self.get_iterator('main')
     target = self.get_target('main')
     loss = 0
     count = 0
     for batch in copy.copy(val_iter):
         x, t = convert.concat_examples(batch, device=self.device, padding=(0, -1))
         xp = chainer.backends.cuda.get_array_module(x)
         state = None
         for i in six.moves.range(len(x[0])):
             state, loss_batch = target(state, x[:, i], t[:, i])
             non_zeros = xp.count_nonzero(x[:, i])
             loss += loss_batch.data * non_zeros
             count += int(non_zeros)
     # report validation loss
     observation = {}
     with reporter.report_scope(observation):
         reporter.report({'loss': float(loss / count)}, target)
     return observation
示例#40
0
def main():
    # config
    max_epoch = 20
    batchsize = 100

    # Model
    model = Net(100, 10)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Data
    train, test = chainer.datasets.get_mnist()
    train_iter = chainer.iterators.SerialIterator(train, batchsize)
    test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False)

    # print header
    print("Epoch\tloss(train)\taccuracy(train)\taccuracy(test)")

    # train
    while train_iter.epoch < max_epoch:

        train_batch = train_iter.next()
        (x, t) = convert.concat_examples(train_batch)

        model.cleargrads()
        loss = model.loss(x, t)
        loss.backward()
        optimizer.update()

        # log every epoch
        if( train_iter.is_new_epoch ):

            # calculate accuracy
            loss_train, accuracy_train  = model.loss_with_accuracy( *train._datasets )
            _         , accuracy_test   = model.loss_with_accuracy( *test._datasets )

            print("%d\t%f\t%f\t%f" % (train_iter.epoch, loss_train.data, accuracy_train, accuracy_test))
示例#41
0
    def evaluate(model, iter):
        # Evaluation routine to be used for validation and test.
        evaluator = model.copy()  # to use different state
        evaluator.rnn.reset_state()  # initialize state
        sum_perp = 0
        data_count = 0
        words = []
        labels = []
        lossfun = softmax_cross_entropy.softmax_cross_entropy
        with configuration.using_config('train', False):
            iter.reset()
            for batch in iter:
                word, label = convert.concat_examples(batch, args.gpu)
                words.append(word)
                labels.append(label)
                data_count += 1
            outputs = evaluator(words)

            for ind in range(len(outputs)):
                y = outputs[ind]
                label = labels[ind]
                loss = lossfun(y, label)
                sum_perp += loss.array
        return np.exp(float(sum_perp) / data_count)
示例#42
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--device', '-d', type=str, default='-1',
                        help='Device specifier. Either ChainerX device '
                        'specifier or an integer. If non-negative integer, '
                        'CuPy arrays with specified device id are used. If '
                        'negative integer, NumPy arrays are used')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot using model '
                             'and state files in the specified directory')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    group = parser.add_argument_group('deprecated arguments')
    group.add_argument('--gpu', '-g', type=int, nargs='?', const=0,
                       help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    device = parse_device(args)

    print('Device: {}'.format(device))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    model = L.Classifier(train_mnist.MLP(args.unit, 10))
    model.to_device(device)
    device.use()

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    if args.resume:
        # Resume from a snapshot
        serializers.load_npz('{}/mlp.model'.format(args.resume), model)
        serializers.load_npz('{}/mlp.state'.format(args.resume), optimizer)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_count = len(train)
    test_count = len(test)

    with SerialIterator(train, args.batchsize) as train_iter, \
        SerialIterator(
            test, args.batchsize, repeat=False, shuffle=False) as test_iter:

        sum_accuracy = 0
        sum_loss = 0

        while train_iter.epoch < args.epoch:
            batch = train_iter.next()
            x, t = convert.concat_examples(batch, device)
            optimizer.update(model, x, t)
            sum_loss += float(model.loss.array) * len(t)
            sum_accuracy += float(model.accuracy.array) * len(t)

            if train_iter.is_new_epoch:
                print('epoch: {}'.format(train_iter.epoch))
                print('train mean loss: {}, accuracy: {}'.format(
                    sum_loss / train_count, sum_accuracy / train_count))
                # evaluation
                sum_accuracy = 0
                sum_loss = 0
                # Enable evaluation mode.
                with configuration.using_config('train', False):
                    # This is optional but can reduce computational overhead.
                    with chainer.using_config('enable_backprop', False):
                        for batch in test_iter:
                            x, t = convert.concat_examples(batch, device)
                            loss = model(x, t)
                            sum_loss += float(loss.array) * len(t)
                            sum_accuracy += float(
                                model.accuracy.array) * len(t)

                test_iter.reset()
                print('test mean  loss: {}, accuracy: {}'.format(
                    sum_loss / test_count, sum_accuracy / test_count))
                sum_accuracy = 0
                sum_loss = 0

        # Save the model and the optimizer
        print('save the model')
        serializers.save_npz('{}/mlp.model'.format(args.out), model)
        print('save the optimizer')
        serializers.save_npz('{}/mlp.state'.format(args.out), optimizer)
示例#43
0
def main():
    parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
    parser.add_argument('--dataset', '-d', default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--batchsize', '-b', type=int, default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate', '-l', type=float, default=0.05,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch', '-e', type=int, default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    if args.dataset == 'cifar10':
        print('Using CIFAR10 dataset.')
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == 'cifar100':
        print('Using CIFAR100 dataset.')
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')

    if args.test:
        train = train[:200]
        test = test[:200]

    train_count = len(train)
    test_count = len(test)

    model = L.Classifier(models.VGG.VGG(class_labels))
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    sum_accuracy = 0
    sum_loss = 0

    while train_iter.epoch < args.epoch:
        batch = train_iter.next()
        # Reduce learning rate by 0.5 every 25 epochs.
        if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch:
            optimizer.lr *= 0.5
            print('Reducing learning rate to: ', optimizer.lr)

        x_array, t_array = convert.concat_examples(batch, args.gpu)
        x = chainer.Variable(x_array)
        t = chainer.Variable(t_array)
        optimizer.update(model, x, t)
        sum_loss += float(model.loss.data) * len(t.data)
        sum_accuracy += float(model.accuracy.data) * len(t.data)

        if train_iter.is_new_epoch:
            print('epoch: ', train_iter.epoch)
            print('train mean loss: {}, accuracy: {}'.format(
                sum_loss / train_count, sum_accuracy / train_count))
            # evaluation
            sum_accuracy = 0
            sum_loss = 0
            model.predictor.train = False
            for batch in test_iter:
                x_array, t_array = convert.concat_examples(batch, args.gpu)
                x = chainer.Variable(x_array)
                t = chainer.Variable(t_array)
                loss = model(x, t)
                sum_loss += float(loss.data) * len(t.data)
                sum_accuracy += float(model.accuracy.data) * len(t.data)

            test_iter.reset()
            model.predictor.train = True
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_accuracy / test_count))
            sum_accuracy = 0
            sum_loss = 0

    # Save the model and the optimizer
    print('save the model')
    serializers.save_npz('mlp.model', model)
    print('save the optimizer')
    serializers.save_npz('mlp.state', optimizer)
示例#44
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: VAE')
    parser.add_argument('--initmodel', '-m', default='',
                        help='Initialize the model from given file')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the optimization from snapshot')
    parser.add_argument('--gpu', '-g', default=-1, type=int,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--epoch', '-e', default=100, type=int,
                        help='number of epochs to learn')
    parser.add_argument('--dimz', '-z', default=20, type=int,
                        help='dimention of encoded vector')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='learning minibatch size')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# dim z: {}'.format(args.dimz))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Prepare VAE model, defined in net.py
    model = net.VAE(784, args.dimz, 500)
    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Initialize / Resume
    if args.initmodel:
        chainer.serializers.load_npz(args.initmodel, model)

    if args.resume:
        chainer.serializers.load_npz(args.resume, optimizer)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist(withlabel=False)
    if args.test:
        train, _ = chainer.datasets.split_dataset(train, 100)
        test, _ = chainer.datasets.split_dataset(test, 100)
    train_count = len(train)
    test_count = len(test)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    while train_iter.epoch < args.epoch:
        sum_loss = 0
        sum_rec_loss = 0

        batch = train_iter.next()
        x_array = convert.concat_examples(batch, args.gpu)
        x = chainer.Variable(x_array)
        # Update model based on the loss function
        # defined by model.get_loss_func()
        optimizer.update(model.get_loss_func(), x)

        sum_loss += float(model.loss.data) * len(x.data)
        sum_rec_loss += float(model.rec_loss.data) * len(x.data)

        if train_iter.is_new_epoch:
            print('train mean loss={}, mean reconstruction loss={}'
                  .format(sum_loss / train_count, sum_rec_loss / train_count))

            # evaluation
            sum_loss = 0
            sum_rec_loss = 0
            for batch in test_iter:
                x_array = convert.concat_examples(batch, args.gpu)
                x = chainer.Variable(x_array)
                loss_func = model.get_loss_func(k=10)
                loss_func(x)
                sum_loss += float(model.loss.data) * len(x.data)
                sum_rec_loss += float(model.rec_loss.data) * len(x.data)

            test_iter.reset()
            print('test mean loss={}, mean reconstruction loss={}'
                  .format(sum_loss / test_count, sum_rec_loss / test_count))

    # Note that os.makedirs(path, exist_ok=True) can be used
    # if this script only supports python3
    if not os.path.exists(args.out):
        os.mkdir(args.out)

    # Save the model and the optimizer
    print('save the model')
    chainer.serializers.save_npz(
        os.path.join(args.out, 'mlp.model'), model)
    print('save the optimizer')
    chainer.serializers.save_npz(
        os.path.join(args.out, 'mlp.state'), optimizer)

    # Visualize the results
    def save_images(x, filename):
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100)
        for ai, xi in zip(ax.flatten(), x):
            ai.imshow(xi.reshape(28, 28))
        fig.savefig(filename)

    model.to_cpu()
    train_ind = [1, 3, 5, 10, 2, 0, 13, 15, 17]
    x = chainer.Variable(np.asarray(train[train_ind]))
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        x1 = model(x)
    save_images(x.data, os.path.join(args.out, 'train'))
    save_images(x1.data, os.path.join(args.out, 'train_reconstructed'))

    test_ind = [3, 2, 1, 18, 4, 8, 11, 17, 61]
    x = chainer.Variable(np.asarray(test[test_ind]))
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        x1 = model(x)
    save_images(x.data, os.path.join(args.out, 'test'))
    save_images(x1.data, os.path.join(args.out, 'test_reconstructed'))

    # draw images from randomly sampled z
    z = chainer.Variable(
        np.random.normal(0, 1, (9, args.dimz)).astype(np.float32))
    x = model.decode(z)
    save_images(x.data, os.path.join(args.out, 'sampled'))
示例#45
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize', '-b', type=int, default=20,
                        help='Number of examples in each mini-batch')
    parser.add_argument('--bproplen', '-l', type=int, default=35,
                        help='Number of words in each mini-batch '
                             '(= length of truncated BPTT)')
    parser.add_argument('--epoch', '-e', type=int, default=39,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--gradclip', '-c', type=float, default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', type=str,
                        help='Directory that has `rnnln.model`'
                        ' and `rnnlm.state`')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.set_defaults(test=False)
    parser.add_argument('--unit', '-u', type=int, default=650,
                        help='Number of LSTM units in each layer')
    args = parser.parse_args()

    def evaluate(model, iter):
        # Evaluation routine to be used for validation and test.
        evaluator = model.copy()  # to use different state
        evaluator.predictor.reset_state()  # initialize state
        sum_perp = 0
        data_count = 0
        # Enable evaluation mode.
        with configuration.using_config('train', False):
            # This is optional but can reduce computational overhead.
            with chainer.using_config('enable_backprop', False):
                iter.reset()
                for batch in iter:
                    x, t = convert.concat_examples(batch, args.gpu)
                    loss = evaluator(x, t)
                    sum_perp += loss.array
                    data_count += 1
        return np.exp(float(sum_perp) / data_count)

    # Load the Penn Tree Bank long word sequence dataset
    train, val, test = chainer.datasets.get_ptb_words()
    n_vocab = max(train) + 1  # train is just an array of integers
    print('#vocab = {}'.format(n_vocab))

    if args.test:
        train = train[:100]
        val = val[:100]
        test = test[:100]

    # Create the dataset iterators
    train_iter = train_ptb.ParallelSequentialIterator(train, args.batchsize)
    val_iter = train_ptb.ParallelSequentialIterator(val, 1, repeat=False)
    test_iter = train_ptb.ParallelSequentialIterator(test, 1, repeat=False)

    # Prepare an RNNLM model
    rnn = train_ptb.RNNForLM(n_vocab, args.unit)
    model = L.Classifier(rnn)
    model.compute_accuracy = False  # we only want the perplexity
    if args.gpu >= 0:
        # Make the specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # Set up an optimizer
    optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))

    # Load model and optimizer
    if args.resume is not None:
        resume = args.resume
        if os.path.exists(resume):
            serializers.load_npz(os.path.join(resume, 'rnnlm.model'), model)
            serializers.load_npz(
                os.path.join(resume, 'rnnlm.state'), optimizer)
        else:
            raise ValueError(
                '`args.resume` ("{}") is specified,'
                ' but it does not exist'.format(resume)
            )

    sum_perp = 0
    count = 0
    iteration = 0
    while train_iter.epoch < args.epoch:
        loss = 0
        iteration += 1
        # Progress the dataset iterator for bprop_len words at each iteration.
        for i in range(args.bproplen):
            # Get the next batch (a list of tuples of two word IDs)
            batch = train_iter.__next__()
            # Concatenate the word IDs to matrices and send them to the device
            # self.converter does this job
            # (it is chainer.dataset.concat_examples by default)
            x, t = convert.concat_examples(batch, args.gpu)
            # Compute the loss at this time step and accumulate it
            loss += optimizer.target(chainer.Variable(x), chainer.Variable(t))
            count += 1

        sum_perp += loss.array
        optimizer.target.cleargrads()  # Clear the parameter gradients
        loss.backward()  # Backprop
        loss.unchain_backward()  # Truncate the graph
        optimizer.update()  # Update the parameters

        if iteration % 20 == 0:
            print('iteration: {}'.format(iteration))
            print('training perplexity: {}'.format(
                np.exp(float(sum_perp) / count)))
            sum_perp = 0
            count = 0

        if train_iter.is_new_epoch:
            print('epoch: {}'.format(train_iter.epoch))
            print('validation perplexity: {}'.format(
                evaluate(model, val_iter)))

    # Evaluate on test dataset
    print('test')
    test_perp = evaluate(model, test_iter)
    print('test perplexity: {}'.format(test_perp))

    # Save the model and the optimizer
    out = args.out
    if not os.path.exists(out):
        os.makedirs(out)
    print('save the model')
    serializers.save_npz(os.path.join(out, 'rnnlm.model'), model)
    print('save the optimizer')
    serializers.save_npz(os.path.join(out, 'rnnlm.state'), optimizer)
示例#46
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot using model '
                             'and state files in the specified directory')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    model = L.Classifier(train_mnist.MLP(args.unit, 10))
    if args.gpu >= 0:
        # Make a speciied GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    if args.resume:
        # Resume from a snapshot
        serializers.load_npz('{}/mlp.model'.format(args.resume), model)
        serializers.load_npz('{}/mlp.state'.format(args.resume), optimizer)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_count = len(train)
    test_count = len(test)

    with MultiprocessIterator(train, args.batchsize) as train_iter, \
        MultiprocessIterator(test, args.batchsize,
                             repeat=False, shuffle=False) as test_iter:

        sum_accuracy = 0
        sum_loss = 0

        while train_iter.epoch < args.epoch:
            batch = train_iter.next()
            x, t = convert.concat_examples(batch, args.gpu)
            optimizer.update(model, x, t)
            sum_loss += float(model.loss.data) * len(t)
            sum_accuracy += float(model.accuracy.data) * len(t)

            if train_iter.is_new_epoch:
                print('epoch: {}'.format(train_iter.epoch))
                print('train mean loss: {}, accuracy: {}'.format(
                    sum_loss / train_count, sum_accuracy / train_count))
                # evaluation
                sum_accuracy = 0
                sum_loss = 0
                # Enable evaluation mode.
                with configuration.using_config('train', False):
                    # This is optional but can reduce computational overhead.
                    with chainer.using_config('enable_backprop', False):
                        for batch in test_iter:
                            x, t = convert.concat_examples(batch, args.gpu)
                            loss = model(x, t)
                            sum_loss += float(loss.data) * len(t)
                            sum_accuracy += float(model.accuracy.data) * len(t)

                test_iter.reset()
                print('test mean  loss: {}, accuracy: {}'.format(
                    sum_loss / test_count, sum_accuracy / test_count))
                sum_accuracy = 0
                sum_loss = 0

        # Save the model and the optimizer
        print('save the model')
        serializers.save_npz('{}/mlp.model'.format(args.out), model)
        print('save the optimizer')
        serializers.save_npz('{}/mlp.state'.format(args.out), optimizer)
示例#47
0
def main():
    # get args
    parser = argparse.ArgumentParser(description='ConvolutionNN')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=10,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--make-dataset', '-m', action='store_const',
                        const=True, default=False,
                        help='make dataset in circle_motion directory')
    args = parser.parse_args()

    if args.make_dataset:
        print('Generate dataset.')
        d = Dataset()
        d.make()
        print('Finish. Saved in circle_motion directory.')
        print('--------')
        print('')

    # print config
    print("#GPU       : {}".format(args.gpu))
    print("#batchsize : {}".format(args.batchsize))
    print("#epoch     : {}".format(args.epoch))

    # Model
    model = Net(50, 10)
    # setting for using GPU
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        model.to_gpu()  # Copy the model to the GPU

    # Optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Data
    train = make_dataset()
    print("train shape:", train[0][0].shape)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter  = chainer.iterators.SerialIterator(train, args.batchsize)

    # print header
    print("Epoch\tloss(train)")

    # train
    while train_iter.epoch < args.epoch:

        # next batch data
        train_batch = train_iter.next()
        (x, t) = convert.concat_examples(train_batch)

        # calculate loss and optimize params
        model.cleargrads()
        loss = model.forward(x, t)
        loss.backward()
        optimizer.update()

        # log every epoch
        if( train_iter.is_new_epoch ):
            print("%d\t%f" % (train_iter.epoch, loss.data))

    # 生成した結果を保存
    idx = 0    # データの番号
    x = np.array([train[idx][0]])
    y = model(x)
    if args.gpu >= 0:
        y.to_cpu()
    result = y.data[0][0]
    print(result.shape)
    save_result( result )
示例#48
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--model', '-m', default='MLP',
                        help='Choose the model: MLP or MLPSideEffect')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    if args.model == 'MLP':
        model = L.Classifier(train_mnist.MLP(args.unit, 10))
    elif args.model == 'MLPSideEffect':
        model = L.Classifier(train_mnist.MLPSideEffect(args.unit, 10))
    if args.gpu >= 0:
        # Make a speciied GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_count = len(train)
    test_count = len(test)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    sum_accuracy = 0
    sum_loss = 0

    while train_iter.epoch < args.epoch:
        batch = train_iter.next()
        x_array, t_array = convert.concat_examples(batch, args.gpu)
        x = chainer.Variable(x_array)
        t = chainer.Variable(t_array)
        optimizer.update(model, x, t)
        sum_loss += float(model.loss.data) * len(t.data)
        sum_accuracy += float(model.accuracy.data) * len(t.data)

        if train_iter.is_new_epoch:
            print('epoch: ', train_iter.epoch)
            print('train mean loss: {}, accuracy: {}'.format(
                sum_loss / train_count, sum_accuracy / train_count))
            # evaluation
            sum_accuracy = 0
            sum_loss = 0
            # It is good practice to turn off train mode during evaluation.
            with configuration.using_config('train', False):
                for batch in test_iter:
                    x_array, t_array = convert.concat_examples(batch, args.gpu)
                    x = chainer.Variable(x_array)
                    t = chainer.Variable(t_array)
                    loss = model(x, t)
                    sum_loss += float(loss.data) * len(t.data)
                    sum_accuracy += float(model.accuracy.data) * len(t.data)

            test_iter.reset()
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_accuracy / test_count))
            sum_accuracy = 0
            sum_loss = 0

    # Save the model and the optimizer
    print('save the model')
    serializers.save_npz('mlp.model', model)
    print('save the optimizer')
    serializers.save_npz('mlp.state', optimizer)
示例#49
0
def main():
    np.random.seed(0)
    random.seed(1)

    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize', '-b', type=int, default=20,
                        help='Number of examples in each mini-batch')
    parser.add_argument('--bproplen', '-l', type=int, default=25,
                        help='Number of words in each mini-batch '
                             '(= length of truncated BPTT)')
    parser.add_argument('--epoch', '-e', type=int, default=39,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--gradclip', '-c', type=float, default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.set_defaults(test=False)
    parser.add_argument('--unit', '-u', type=int, default=650,
                        help='Number of LSTM units in each layer')
    args = parser.parse_args()

    def evaluate(model, iter):
        # Evaluation routine to be used for validation and test.
        evaluator = model.copy()  # to use different state
        evaluator.rnn.reset_state()  # initialize state
        sum_perp = 0
        data_count = 0
        words = []
        labels = []
        lossfun = softmax_cross_entropy.softmax_cross_entropy
        with configuration.using_config('train', False):
            iter.reset()
            for batch in iter:
                word, label = convert.concat_examples(batch, args.gpu)
                words.append(word)
                labels.append(label)
                data_count += 1
            outputs = evaluator(words)

            for ind in range(len(outputs)):
                y = outputs[ind]
                label = labels[ind]
                loss = lossfun(y, label)
                sum_perp += loss.array
        return np.exp(float(sum_perp) / data_count)

    # Load the Penn Tree Bank long word sequence dataset
    train, val, test = chainer.datasets.get_ptb_words()
    n_vocab = max(train) + 1  # train is just an array of integers
    print('#vocab =', n_vocab)

    if args.test:
        train = train[:100]
        val = val[:100]
        test = test[:100]

    # Create the dataset iterators
    train_iter = ParallelSequentialIterator(train, args.batchsize)
    val_iter = ParallelSequentialIterator(val, 1, repeat=False)
    test_iter = ParallelSequentialIterator(test, 1, repeat=False)

    # Prepare an RNNLM model
    model = RNNForLMUnrolled(n_vocab, args.unit)
    lossfun = softmax_cross_entropy.softmax_cross_entropy
    if args.gpu >= 0:
        # Make the specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    # Set up an optimizer
    optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))

    sum_perp = 0
    count = 0
    iteration = 0

    while train_iter.epoch < args.epoch:
        iteration += 1
        words = []
        labels = []
        # Progress the dataset iterator for bprop_len words at each iteration.
        for i in range(args.bproplen):
            # Get the next batch (a list of tuples of two word IDs)
            batch = train_iter.__next__()
            # Concatenate the word IDs to matrices and send them to the device
            # self.converter does this job
            # (it is chainer.dataset.concat_examples by default)
            word, label = convert.concat_examples(batch, args.gpu)
            words.append(word)
            labels.append(label)
            count += 1

        outputs = model(words)

        loss = 0
        for ind in range(len(outputs)):
            y = outputs[ind]
            label = labels[ind]
            loss += lossfun(y, label)

        sum_perp += loss.array
        optimizer.target.cleargrads()  # Clear the parameter gradients
        loss.backward()  # Backprop
        loss.unchain_backward()  # Truncate the graph
        optimizer.update()  # Update the parameters

        if iteration % 20 == 0:
            print('iteration: ', iteration)
            print('training perplexity: ', np.exp(float(sum_perp) / count))
            sum_perp = 0
            count = 0

        if train_iter.is_new_epoch:
            print('Evaluating model on validation set...')
            print('epoch: ', train_iter.epoch)
            print('validation perplexity: ', evaluate(model, val_iter))

    # Evaluate on test dataset
    print('test')
    test_perp = evaluate(model, test_iter)
    print('test perplexity:', test_perp)

    # Save the model and the optimizer
    print('save the model')
    serializers.save_npz('rnnlm.model', model)
    print('save the optimizer')
    serializers.save_npz('rnnlm.state', optimizer)
示例#50
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize', '-b', type=int, default=20,
                        help='Number of examples in each mini-batch')
    parser.add_argument('--bproplen', '-l', type=int, default=35,
                        help='Number of words in each mini-batch '
                             '(= length of truncated BPTT)')
    parser.add_argument('--epoch', '-e', type=int, default=39,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--gradclip', '-c', type=float, default=5,
                        help='Gradient norm threshold to clip')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.set_defaults(test=False)
    parser.add_argument('--unit', '-u', type=int, default=650,
                        help='Number of LSTM units in each layer')
    args = parser.parse_args()

    def evaluate(model, iter):
        # Evaluation routine to be used for validation and test.
        model.predictor.train = False
        evaluator = model.copy()  # to use different state
        evaluator.predictor.reset_state()  # initialize state
        evaluator.predictor.train = False  # dropout does nothing
        sum_perp = 0
        data_count = 0
        for batch in copy.copy(iter):
            x, t = convert.concat_examples(batch, args.gpu)
            loss = evaluator(x, t)
            sum_perp += loss.data
            data_count += 1
        model.predictor.train = True
        return np.exp(float(sum_perp) / data_count)

    # Load the Penn Tree Bank long word sequence dataset
    train, val, test = chainer.datasets.get_ptb_words()
    n_vocab = max(train) + 1  # train is just an array of integers
    print('#vocab =', n_vocab)

    if args.test:
        train = train[:100]
        val = val[:100]
        test = test[:100]

    # Create the dataset iterators
    train_iter = train_ptb.ParallelSequentialIterator(train, args.batchsize)
    val_iter = train_ptb.ParallelSequentialIterator(val, 1, repeat=False)
    test_iter = train_ptb.ParallelSequentialIterator(test, 1, repeat=False)

    # Prepare an RNNLM model
    rnn = train_ptb.RNNForLM(n_vocab, args.unit)
    model = L.Classifier(rnn)
    model.compute_accuracy = False  # we only want the perplexity
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()  # make the GPU current
        model.to_gpu()

    # Set up an optimizer
    optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))

    sum_perp = 0
    count = 0
    iteration = 0
    while train_iter.epoch < args.epoch:
        loss = 0
        iteration += 1
        # Progress the dataset iterator for bprop_len words at each iteration.
        for i in range(args.bproplen):
            # Get the next batch (a list of tuples of two word IDs)
            batch = train_iter.__next__()
            # Concatenate the word IDs to matrices and send them to the device
            # self.converter does this job
            # (it is chainer.dataset.concat_examples by default)
            x, t = convert.concat_examples(batch, args.gpu)
            # Compute the loss at this time step and accumulate it
            loss += optimizer.target(chainer.Variable(x), chainer.Variable(t))
            count += 1

        sum_perp += loss.data
        optimizer.target.cleargrads()  # Clear the parameter gradients
        loss.backward()  # Backprop
        loss.unchain_backward()  # Truncate the graph
        optimizer.update()  # Update the parameters

        if iteration % 20 == 0:
            print('iteration: ', iteration)
            print('training perplexity: ', np.exp(float(sum_perp) / count))
            sum_perp = 0
            count = 0

        if train_iter.is_new_epoch:
            print('epoch: ', train_iter.epoch)
            print('validation perplexity: ', evaluate(model, val_iter))

    # Evaluate on test dataset
    print('test')
    test_perp = evaluate(model, test_iter)
    print('test perplexity:', test_perp)

    # Save the model and the optimizer
    print('save the model')
    serializers.save_npz('rnnlm.model', model)
    print('save the optimizer')
    serializers.save_npz('rnnlm.state', optimizer)
示例#51
0
文件: resnet.py 项目: ktnyt/chainer
    def extract(self, images, layers=None, size=(224, 224), **kwargs):
        """extract(self, images, layers=['pool5'], size=(224, 224))

        Extracts all the feature maps of given images.

        The difference of directly executing ``forward`` is that
        it directly accepts images as an input and automatically
        transforms them to a proper variable. That is,
        it is also interpreted as a shortcut method that implicitly calls
        ``prepare`` and ``forward`` functions.

        Unlike ``predict`` method, this method does not override
        ``chainer.config.train`` and ``chainer.config.enable_backprop``
        configuration. If you want to extract features without updating
        model parameters, you need to manually set configuration when
        calling this method as follows:

         .. code-block:: python

             # model is an instance of ResNetLayers (50 or 101 or 152 layers)
             with chainer.using_config('train', False):
                 with chainer.using_config('enable_backprop', False):
                     feature = model.extract([image])

        .. warning::

           ``test`` and ``volatile`` arguments are not supported
           anymore since v2. Instead, users should configure
           training and volatile modes with ``train`` and
           ``enable_backprop``, respectively.

           Note that default behavior of this method is different
           between v1 and later versions. Specifically,
           the default values of ``test`` in v1 were ``True`` (test mode).
           But that of ``chainer.config.train`` is also ``True``
           (train mode). Therefore, users need to explicitly switch
           ``train`` to ``False`` to run the code in test mode and
           ``enable_backprop`` to ``False`` to turn off
           coputational graph construction.

           See the `upgrade guide <https://docs.chainer.org/en/stable\
           /upgrade_v2.html#training-mode-is-configured-by-a-thread-local-flag>`_.

        Args:
            images (iterable of PIL.Image or numpy.ndarray): Input images.
            layers (list of str): The list of layer names you want to extract.
            size (pair of ints): The resolution of resized images used as
                an input of CNN. All the given images are not resized
                if this argument is ``None``, but the resolutions of
                all the images should be the same.

        Returns:
            Dictionary of ~chainer.Variable: A directory in which
            the key contains the layer name and the value contains
            the corresponding feature map variable.

        """

        if layers is None:
            layers = ['pool5']

        if kwargs:
            argument.check_unexpected_kwargs(
                kwargs, test='test argument is not supported anymore. '
                'Use chainer.using_config',
                volatile='volatile argument is not supported anymore. '
                'Use chainer.using_config')
            argument.assert_kwargs_empty(kwargs)

        x = concat_examples([prepare(img, size=size) for img in images])
        x = Variable(self.xp.asarray(x))
        return self(x, layers=layers)
示例#52
0
def main():
    parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
    parser.add_argument('--dataset', '-d', default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--batchsize', '-b', type=int, default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate', '-l', type=float, default=0.05,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch', '-e', type=int, default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.add_argument('--resume', '-r', type=str,
                        help='Directory that has `vgg.model` and `vgg.state`')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    if args.dataset == 'cifar10':
        print('Using CIFAR10 dataset.')
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == 'cifar100':
        print('Using CIFAR100 dataset.')
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')

    if args.test:
        train = train[:200]
        test = test[:200]

    train_count = len(train)
    test_count = len(test)

    model = L.Classifier(models.VGG.VGG(class_labels))
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    if args.resume is not None:
        resume = args.resume
        if os.path.exists(resume):
            serializers.load_npz(os.path.join(resume, 'vgg.model'), model)
            serializers.load_npz(os.path.join(resume, 'vgg.state'), optimizer)
        else:
            raise ValueError(
                '`args.resume` ("{}") is specified,'
                ' but it does not exist.'.format(resume)
            )

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    sum_acc = 0
    sum_loss = 0

    while train_iter.epoch < args.epoch:
        batch = train_iter.next()
        # Reduce learning rate by 0.5 every 25 epochs.
        if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch:
            optimizer.lr *= 0.5
            print('Reducing learning rate to: {}'.format(optimizer.lr))

        x_array, t_array = convert.concat_examples(batch, args.gpu)
        x = chainer.Variable(x_array)
        t = chainer.Variable(t_array)
        optimizer.update(model, x, t)
        sum_loss += float(model.loss.array) * len(t)
        sum_acc += float(model.accuracy.array) * len(t)

        if train_iter.is_new_epoch:
            print('epoch: {}'.format(train_iter.epoch))
            print('train mean loss: {}, accuracy: {}'.format(
                sum_loss / train_count, sum_acc / train_count))
            sum_acc = 0
            sum_loss = 0
            # Enable evaluation mode.
            with configuration.using_config('train', False):
                # This is optional but can reduce computational overhead.
                with chainer.using_config('enable_backprop', False):
                    for batch in test_iter:
                        x, t = convert.concat_examples(batch, args.gpu)
                        x = chainer.Variable(x)
                        t = chainer.Variable(t)
                        loss = model(x, t)
                        sum_loss += float(loss.array) * len(t)
                        sum_acc += float(model.accuracy.array) * len(t)

            test_iter.reset()
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_acc / test_count))
            sum_acc = 0
            sum_loss = 0

    # Save the model and the optimizer
    out = args.out
    if not os.path.exists(out):
        os.makedirs(out)
    print('save the model')
    serializers.save_npz(os.path.join(out, 'vgg.model'), model)
    print('save the optimizer')
    serializers.save_npz(os.path.join(out, 'vgg.state'), optimizer)
示例#53
0
    def test_mnist_simple(self, display_log=True):
        updater, optimizer, train_iter, _, model = self.setup_mnist_trainer()

        path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + "-tmp-")
        if display_log:
            print("temporary file:", path)
        checkpointer = create_multi_node_checkpointer(name=__name__,
                                                      comm=self.communicator,
                                                      path=path)
        checkpointer.maybe_load(updater, optimizer)

        sum_accuracy = 0
        sum_loss = 0
        stop = 5
        train_count = len(train_iter.dataset)
        while train_iter.epoch < stop:
            batch = train_iter.next()
            x_array, t_array = convert.concat_examples(batch, -1)
            x = chainer.Variable(x_array)
            t = chainer.Variable(t_array)
            optimizer.update(model, x, t)

            sum_loss += float(model.loss.data) * len(t.data)
            sum_accuracy += float(model.accuracy.data) * len(t.data)

            if train_iter.is_new_epoch:
                if display_log:
                    print(updater.iteration, train_iter.epoch,
                          sum_loss / train_count, sum_accuracy / train_count)
                sum_loss = 0
                sum_accuracy = 0

                checkpointer.save(updater, updater.iteration)

        if display_log:
            print(self.communicator.rank, checkpointer.get_stats())

        # Allocate totally different set of training tools to avoid leakage
        data_2 = self.setup_mnist_trainer()
        updater2, optimizer2, train_iter2, test_iter2, model2 = data_2
        checkpointer2 = create_multi_node_checkpointer(
            name=__name__, comm=self.communicator, path=path)
        checkpointer2.maybe_load(updater2, optimizer2)

        # Check data properly resumed
        self.assertEqual(updater.epoch, updater2.epoch)
        self.assertEqual(updater.iteration, updater2.iteration)
        # TODO(kuenishi): find a simple way to assure model equality
        # in terms of float matrix
        # self.assertEqual(model, model2)

        # Restart training
        while train_iter2.epoch < stop * 2:
            batch = train_iter2.next()
            x_array, t_array = convert.concat_examples(batch, -1)
            x = chainer.Variable(x_array)
            t = chainer.Variable(t_array)
            optimizer2.update(model2, x, t)

            sum_loss += float(model2.loss.data) * len(t.data)
            sum_accuracy += float(model2.accuracy.data) * len(t.data)

            if train_iter2.is_new_epoch:
                print(updater2.iteration, train_iter2.epoch,
                      sum_loss / train_count, sum_accuracy / train_count)
                sum_loss = 0
                sum_accuracy = 0

                checkpointer2.save(updater2, updater2.iteration)

        if display_log:
            print(self.communicator.rank, checkpointer2.get_stats())
        checkpointer2.finalize()
        checkpointer.finalize()

        # Validate training
        sum_accuracy = 0
        sum_loss = 0
        test_count = len(test_iter2.dataset)
        for batch in test_iter2:
            x_array, t_array = convert.concat_examples(batch, -1)
            x = chainer.Variable(x_array)
            t = chainer.Variable(t_array)
            loss = model2(x, t)
            sum_loss += float(loss.data) * len(t.data)
            sum_accuracy += float(model2.accuracy.data) * len(t.data)

        if display_log:
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_accuracy / test_count))

        self.assertGreaterEqual(sum_accuracy / test_count, 0.95)
        os.removedirs(path)