Exemplo n.º 1
0
def normalizing_params(data_names):
    total_mean = []
    total_std = []

    for data_name in data_names:
        data_params = {
            'data_name': data_name,
            'mode': 'train',
            'root_dir': '/mnt/nfs/work1/ds4cg',
            'batch_size': 512,
            'num_workers': 4,
            'label_type': 'binary'
        }
        dataloader = load(**data_params)
        num_batches = len(dataloader['train'])
        for i, (image, label) in enumerate(dataloader['train']):
            numpy_image = image.numpy()

            batch_mean = np.mean(numpy_image, axis=(0,2,3))
            batch_std = np.std(numpy_image, axis=(0,2,3))

            total_mean.append(batch_mean)
            total_std.append(batch_std)

            log.infov('{}/{}'.format(i, num_batches))
    mean = np.mean(total_mean, axis=0) / 255.0
    std = np.mean(total_std, axis=0) / 255.0

    log.infov('mean: {}, std: {}'.format(mean, std))
Exemplo n.º 2
0
def build(model_config, checkpoint):
    if 'name' not in model_config:
        log.error('Specify a model name')
    model_name = model_config['name']

    # build model
    if model_name in SUPERVISED_MODELS:
        log.infov('{} model is built'.format(model_name.upper()))
        model = build_supervised_model(model_name, model_config)
    elif model_name in SEMI_MODELS:
        log.infov('{} model is built'.format(model_name.upper()))
        model = build_semi_model(model_name, model_config)
    else:
        SUPERVISED_MODELS.update(SEMI_MODELS)
        log.error(
            'Enter valid model name among {}'.format(SUPERVISED_MODELS)
        ); exit()

    # load model
    if checkpoint is not None:
        model.load_state_dict(checkpoint['model_state_dict'])
        log.infov('Model is built using the given checkpoint')
    else:
        log.infov('Model is built without checkpoint')

    # parallelize model
    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)
        log.warn("{} GPUs will be used.".format(torch.cuda.device_count()))

    return model
Exemplo n.º 3
0
def build(train_config, label_type):
    # set cross entropy as a default
    if 'criterion' not in train_config:
        criterion_config = {'name': 'cross_entropy'}
    else:
        criterion_config = train_config['criterion']
    criterion_name = label_type + '_' + criterion_config.pop('name')

    if criterion_config:
        criterion = CRITERIONS[criterion_name](**criterion_config)
    else:
        criterion = CRITERIONS[criterion_name]()

    log.infov('{} is built'.format(criterion_name))
    return criterion
Exemplo n.º 4
0
    def _evaluate_once(self, data_name, is_label_available, num_batches,
                       use_roc):
        if is_label_available:
            total_loss, num_corrects = 0.0, 0
        else:
            prediction_results = []

        self.model.eval()
        for i, (inputs, labels) in enumerate(self.dataloader['eval']):
            inputs = inputs.to(self.device)
            if is_label_available:
                labels = labels.to(self.device).unsqueeze(-1).float()

            # Forward propagation
            outputs = self.model(inputs)

            # Use softmax when the num of classes > 1 else sigmoid
            if self.num_classes > 1:
                _, predictions = torch.max(outputs, 1)
            else:
                probabilities = torch.sigmoid(outputs)
                predictions = torch.gt(probabilities, 0.5).float()
                if use_roc and is_label_available:
                    # TODO: implement save_roc
                    util.save_roc(probabilities, labels)

            # Statistics
            if is_label_available:
                num_corrects += torch.sum((predictions == labels).int())
            else:
                predictions = predictions.data.cpu().numpy().flatten()
                ids = [id for id in labels]
                for prediction, id in zip(predictions, ids):
                    prediction_results.append([id, int(prediction)])

            log.info('Evaluation batch {}/{}'.format(i + 1, num_batches))

        if is_label_available:
            eval_acc = num_corrects.double() / float(
                len(self.dataloader['eval'].dataset))
            # TODO: add step
            log.infov("Evaluation accuracy: {}".format(eval_acc))
            self.writer.add_scalar('eval accuracy', eval_acc)
        else:
            util.save_results(self.mode, self.model_name, self.tag, data_name,
                              prediction_results)
Exemplo n.º 5
0
    def train(self):
        start_epoch = 0 if self.checkpoint is None else self.checkpoint['epoch']
        if not isinstance(start_epoch, int):
            start_epoch = 0
        num_epochs = self.train_config.get('num_epochs', 50)
        if num_epochs < start_epoch:
            num_epochs = start_epoch + 50

        log.info("Training for {} epochs starts from epoch {}".format(
            num_epochs, start_epoch))

        val_accuracies = []
        best_acc = 0.0

        for epoch in range(start_epoch, num_epochs):
            train_start = time.time()

            if self.scheduler is not None:
                self.scheduler.step()

            train_loss = self._train_one_epoch()
            self._save_model(epoch)

            time_elapsed = time.time() - train_start
            log.info(
                'Epoch {} completed in {} - train loss: {:4f}'\
                .format(epoch, time_elapsed, train_loss)
            )

            val_start = time.time()
            val_loss, val_acc = self.validate()

            # save the best model
            if val_acc > best_acc:
                best_acc = val_acc
                self._save_model(epoch, 'best')

            val_accuracies.append(val_acc)
            time_elapsed = time.time() - val_start

            log.infov(
                'Epoch {} completed in {} - val loss: {:4f}, val accuracy {:4f}'\
                .format(epoch, time_elapsed, val_loss, val_acc)
            )
Exemplo n.º 6
0
    def evaluate(self):
        image_paths, predictions, labels, label_type = self.predict()
        is_label_available = util.is_label_available(labels)

        if is_label_available:
            num_images = len(labels)
            num_corrects = np.sum(predictions == labels)
            accuracy = num_corrects / float(num_images)
            log.infov('Evaluation accuracy: {}'.format(accuracy))
        else:
            ids = [path.split('/')[-1].split('.')[0] for path in image_paths]
            results = np.stack((ids, predictions), axis=-1)
            results = np.concatenate(([['id', 'animal_present']], results), axis=0)
            output_file = os.path.join(self.eval_config['output_dir'], 'results.csv')
            with open(output_file, 'w') as f:
                writer = csv.writer(f)
                writer.writerows(results)
            log.infov('Evaluation results are written on {}'.format(output_file))
        return None
Exemplo n.º 7
0
def build(train_config, optimizer, checkpoint):
    if 'lr_schedule' not in train_config:
        log.infov('No scheduler is specified')
        return None

    schedule_config = train_config['lr_schedule']
    scheduler_name = schedule_config.pop('name', 'step_lr')
    schedule_config['optimizer'] = optimizer

    if scheduler_name in SCHEDULERS:
        scheduler = SCHEDULERS[scheduler_name](**schedule_config)
    else:
        log.error('Specify valid scheduler name among {}'.format(
            SCHEDULERS.keys()))
        exit()

    if checkpoint is not None:
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    log.infov('{} scheduler is built'.format(scheduler_name.upper()))

    return scheduler
Exemplo n.º 8
0
def build(train_config, model_params, checkpoint):
    if 'optimizer' not in train_config:
        log.error('Specify an optimizer')
        exit()

    optim_config = train_config['optimizer']
    optimizer_name = optim_config.pop('name', 'sgd')
    optim_config['params'] = model_params

    if optimizer_name in OPTIMIZERS:
        optimizer = OPTIMIZERS[optimizer_name](**optim_config)
    else:
        log.error('Specify valid optimizer name among {}'.format(
            OPTIMIZERS.keys()))
        exit()

    if checkpoint is not None:
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    log.infov('{} optimizer is built'.format(optimizer_name.upper()))

    return optimizer
Exemplo n.º 9
0
 def _batch(image_paths, labels, total_num, batch_size):
     for idx in range(0, total_num, batch_size):
         log.infov('Processing batch : {}/{}'.format(
             int(idx/batch_size)+1, int(total_num/batch_size)))
         yield (image_paths[idx:min(idx + batch_size, total_num)],
                labels[idx:min(idx + batch_size, total_num)])