Пример #1
0
    def multiprocess_generator(self, max_queue_size=32, num_processes=8):
        # Re-shuffle file list
        if self.shuffle and cfg.NUM_TRAINERS > 1:
            np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines)
            num_lines = len(self.all_lines) // cfg.NUM_TRAINERS
            self.lines = self.all_lines[num_lines * cfg.TRAINER_ID:num_lines *
                                        (cfg.TRAINER_ID + 1)]
            self.shuffle_seed += 1
        elif self.shuffle:
            np.random.shuffle(self.lines)

        # Create multiple sharding generators according to num_processes for multiple processes
        generators = []
        for pid in range(num_processes):
            generators.append(self.sharding_generator(pid, num_processes))

        try:
            enqueuer = GeneratorEnqueuer(generators)
            enqueuer.start(max_queue_size=max_queue_size, workers=num_processes)
            while True:
                generator_out = None
                while enqueuer.is_running():
                    if not enqueuer.queue.empty():
                        generator_out = enqueuer.queue.get(timeout=5)
                        break
                    else:
                        time.sleep(0.01)
                if generator_out is None:
                    break
                yield generator_out
        finally:
            if enqueuer is not None:
                enqueuer.stop()
Пример #2
0
 def reader():
     cnt = 0
     try:
         enqueuer = GeneratorEnqueuer(
             infinite_reader(), use_multiprocessing=use_multiprocess_reader)
         enqueuer.start(max_queue_size=max_queue, workers=num_workers)
         generator_out = None
         while True:
             while enqueuer.is_running():
                 if not enqueuer.queue.empty():
                     generator_out = enqueuer.queue.get()
                     break
                 else:
                     time.sleep(0.02)
             yield generator_out
             cnt += 1
             if cnt >= total_iter:
                 enqueuer.stop()
                 return
             generator_out = None
     except Exception as e:
         print("Exception occured in reader: {}".format(str(e)))
     finally:
         if enqueuer:
             enqueuer.stop()
Пример #3
0
 def reader():
     try:
         enqueuer = GeneratorEnqueuer(
             batches, use_multiprocessing=use_multiprocessing)
         enqueuer.start(max_queue_size=max_queue, workers=num_workers)
         generator_out = None
         for i in range(total_step):
             while enqueuer.is_running():
                 if not enqueuer.queue.empty():
                     generator_out = enqueuer.queue.get()
                     break
                 else:
                     time.sleep(0.02)
             yield generator_out
             generator_out = None
         enqueuer.stop()
     finally:
         if enqueuer is not None:
             enqueuer.stop()
Пример #4
0
 def reader():
     cnt = 0
     try:
         enqueuer = GeneratorEnqueuer(
             infinite_reader(), use_multiprocessing=use_multiprocess_reader)
         enqueuer.start(max_queue_size=max_queue, workers=num_workers)
         generator_out = None
         while True:
             while enqueuer.is_running():
                 if not enqueuer.queue.empty():
                     generator_out = enqueuer.queue.get()
                     break
                 else:
                     time.sleep(0.02)
             yield generator_out
             cnt += 1
             if cnt >= total_iter:
                 enqueuer.stop()
                 return
             generator_out = None
     finally:
         if enqueuer is not None:
             enqueuer.stop()
Пример #5
0
def train(net, args):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    if os.path.exists(os.path.join(current_dir, 'results/')) == False:
        os.mkdir(os.path.join(current_dir, 'results/'))
    save_path = 'results/%s/'%args.dataset
    if os.path.exists(os.path.join(current_dir, save_path)) == False:
        os.mkdir(os.path.join(current_dir, save_path))
    save_path += '%s/'%args.model_name
    if os.path.exists(os.path.join(current_dir, save_path)) == False:
        os.mkdir(os.path.join(current_dir, save_path))
    logger = Logger(save_path + 'logs/')

    train_file_path, val_file_path, data_dir, label_dir = get_dataset_path(args.dataset)
    classes = get_dataset_classes(args.dataset)
    transformer = DataTransformer(ch_mean=args.ch_mean, ch_std=args.ch_std, resize_size=args.resize_size,
                 pad_size=args.pad_size, crop_mode=args.crop_mode, crop_size=args.crop_size,
                 zoom_range=[0.5, 2.0], horizontal_flip=True, color_jittering_range=20.,
                 fill_mode='constant', cval=0., label_cval=255, data_format='channels_first',
                 color_format='RGB', x_dtype=np.float32)
    dataloader = VOC12(data_list_file=train_file_path, data_source_dir=data_dir,
                       label_source_dir=label_dir, data_transformer=transformer,
                       batch_size=args.batch_size, shuffle=True)

    ctx = [gpu(i) for i in args.gpus]
    net = net(classes)
    net.collect_params().initialize(ctx=ctx)
    net.load_base_model(ctx)
    #net.hybridize()
    #print(net)

    num_sample = dataloader.get_num_sample()
    num_steps = num_sample//args.batch_size
    if num_sample % args.batch_size > 0:
        num_steps += 1

    enqueuer = GeneratorEnqueuer(generator=dataloader)
    enqueuer.start(workers=args.workers, max_queue_size=args.max_queue_size)
    output_generator = enqueuer.get()

    trainer = gluon.Trainer(net.collect_params(), 'nag',
                            {'momentum': 0.9, 'wd': 0.0001,
                            'learning_rate': args.base_lr,
                            'lr_scheduler': PolyScheduler(args.base_lr, args.lr_power, num_steps*args.epochs)})
    loss = MySoftmaxCrossEntropyLoss(axis=1, ignore_label=255)
    metrics = [AccuracyWithIgnoredLabel(axis=1, ignore_label=255)]

    for epoch in range(args.epochs):
        print('training epoch %d/%d:'%(epoch+1, args.epochs))
        for m in metrics:
            m.reset()
        train_loss = 0.
        train_acc = 0.
        for i in range(num_steps):
            batch_x, batch_y = next(output_generator)

            batch_x = mx.nd.array(batch_x)
            batch_y = mx.nd.array(batch_y)

            losses = train_batch(batch_x, batch_y, ctx, net, trainer, loss, metrics)

            train_loss += mx.nd.mean(mx.nd.add_n(*losses)).asscalar()/len(args.gpus)
            info = 'loss: %.3f' % (train_loss/(i+1))
            for m in metrics:
                name, value = m.get()
                info += ' | %s: %.3f'%(name, value)
            progress_bar(i, num_steps, info)
        # write logs for this epoch
        logger.scalar_summary('loss', train_loss/num_steps, epoch)
        for m in metrics:
            name, value = m.get()
            logger.scalar_summary(name, value, epoch)
        mx.nd.waitall()
        net.save_params(save_path+'checkpoint.params')

    enqueuer.stop()
Пример #6
0
                                  color_jittering_range=20.,
                                  fill_mode='constant',
                                  cval=0.,
                                  label_cval=255,
                                  data_format='channels_first',
                                  color_format='RGB',
                                  x_dtype=np.float32)
    data_dir = '/home/aurora/Learning/Data/VOC2012/JPEGImages'
    label_dir = '/home/aurora/Learning/Data/VOC2012/SegmentationClass'
    val_file_path = '/home/aurora/Learning/Data/VOC2012/ImageSets/Segmentation/val.txt'
    dataloader = VOC12(data_list_file=val_file_path,
                       data_source_dir=data_dir,
                       label_source_dir=label_dir,
                       data_transformer=transformer,
                       batch_size=1,
                       shuffle=True)

    enqueuer = GeneratorEnqueuer(generator=dataloader)
    enqueuer.start(workers=1, max_queue_size=10)
    output_generator = enqueuer.get()

    x, y = next(output_generator)
    img_y = Image.open(os.path.join(label_dir, '2007_000033.png'))
    result_x = array_to_img(x[0], 'channels_first')
    result_y = Image.fromarray(y[0, 0, :, :], mode='P')
    result_y.putpalette(img_y.getpalette())
    result_x.show(title='result_x', command=None)
    result_y.show(title='result_y', command=None)

    enqueuer.stop()
Пример #7
0
def train(net, args):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    if os.path.exists(os.path.join(current_dir, 'results/')) == False:
        os.mkdir(os.path.join(current_dir, 'results/'))
    save_path = 'results/%s/' % args.dataset
    if os.path.exists(os.path.join(current_dir, save_path)) == False:
        os.mkdir(os.path.join(current_dir, save_path))
    save_path += '%s/' % args.model_name
    if os.path.exists(os.path.join(current_dir, save_path)) == False:
        os.mkdir(os.path.join(current_dir, save_path))
    logger = Logger(save_path + 'logs/')

    train_file_path, val_file_path, data_dir, label_dir = get_dataset_path(
        args.dataset)
    classes = get_dataset_classes(args.dataset)
    transformer = DataTransformer(ch_mean=args.ch_mean,
                                  ch_std=args.ch_std,
                                  resize_size=args.resize_size,
                                  pad_size=args.pad_size,
                                  crop_mode=args.crop_mode,
                                  crop_size=args.crop_size,
                                  zoom_range=[0.5, 2.0],
                                  horizontal_flip=True,
                                  color_jittering_range=20.,
                                  fill_mode='constant',
                                  cval=0.,
                                  label_cval=255,
                                  data_format='channels_first',
                                  color_format='RGB',
                                  x_dtype=np.float32)
    dataloader = VOC12(data_list_file=train_file_path,
                       data_source_dir=data_dir,
                       label_source_dir=label_dir,
                       data_transformer=transformer,
                       batch_size=args.batch_size,
                       shuffle=True)

    num_sample = dataloader.get_num_sample()
    num_steps = num_sample // args.batch_size
    if num_sample % args.batch_size > 0:
        num_steps += 1

    enqueuer = GeneratorEnqueuer(generator=dataloader)
    enqueuer.start(workers=args.workers, max_queue_size=args.max_queue_size)
    output_generator = enqueuer.get()

    net = net(classes)
    net.cuda()
    net = torch.nn.DataParallel(net, device_ids=args.gpus)
    cudnn.benchmark = True
    optimizer = torch.optim.SGD(net.parameters(),
                                lr=args.base_lr,
                                momentum=0.9,
                                weight_decay=args.weight_decay,
                                nesterov=True)
    scheduler = get_polyscheduler(optimizer, args.lr_power, args.epochs)
    loss_functions = [nn.CrossEntropyLoss(ignore_index=255)]
    metric_functions = [SparseAccuracy(ignore_label=255, name='Acc')]

    for epoch in range(args.epochs):
        scheduler.step()
        print('training epoch %d/%d, lr=%.4f:' %
              (epoch + 1, args.epochs,
               optimizer.state_dict()['param_groups'][0]['lr']))
        for m in metric_functions:
            m.reset()
        train_loss = 0.
        for i in range(num_steps):
            batch_x, batch_y = next(output_generator)
            batch_x, batch_y = torch.Tensor(batch_x).cuda(), torch.LongTensor(
                np.squeeze(batch_y).astype(int)).cuda()
            losses = train_batch(batch_x, batch_y, net, optimizer,
                                 loss_functions, metric_functions)
            info = ''
            train_loss += sum([loss.cpu().data.numpy()[0] for loss in losses])
            info += '| loss: %.3f' % (train_loss / (i + 1))
            for m in metric_functions:
                name, value = m.get()
                info += ' | %s: %.3f' % (name, value)
            progress_bar(i, num_steps, info)
        # write logs for this epoch
        logger.scalar_summary('loss', train_loss / num_steps, epoch)
        for m in metric_functions:
            name, value = m.get()
            logger.scalar_summary(name, value, epoch)
        torch.save(net.state_dict(), save_path + 'checkpoint.params')
    enqueuer.stop()