Exemplo n.º 1
0
    def run(self, data_gen, training=True):
        stage = 'train' if training else 'valid'
        running_loss = running_accu = 0.0
        n_steps = len(data_gen)

        progress_bar = tqdm(enumerate(data_gen),
                            total=n_steps,
                            ncols=get_tty_columns(),
                            dynamic_ncols=True,
                            desc='[%s] Loss: %.5f, Accu: %.5f' %
                            (stage, 0.0, 0.0))

        for step, batch in progress_bar:
            loss, accu = self.process_batch(batch, training=training)

            # TODO: multiple classes
            accu = accu.mean()

            progress_bar.set_description('[%s] Loss: %.5f, Avg accu: %.5f' %
                                         (stage, loss.item(), accu.item()))

            running_loss += loss.item()
            running_accu += accu.item()

            if self.logger is not None:
                self.logger.add_scalar('%s/metrics/step_loss' % stage,
                                       loss.item(), self.step + 1)
                self.logger.add_scalar('%s/metrics/step_accu' % stage,
                                       accu.item(), self.step + 1)
            if training:
                self.step += 1

        running_loss /= n_steps
        running_accu /= n_steps

        return {
            'loss': running_loss,
            'accu': running_accu,
        }
Exemplo n.º 2
0
model_handlers = dict()
for (key, cfg) in config['module'].items():
    if 'ckpt' in cfg:
        ckpt = cfg['ckpt']
    else:
        ckpt = None
    model_handlers[key] = ModelHandler(cfg['config'], checkpoint=ckpt)
    model_handlers[key].model.eval()

    # toggle off all trainable parameters of each module
    for param in model_handlers[key].model.parameters():
        param.requires_grad = False

progress_bar = tqdm(data_gen,
                    total=len(data_gen),
                    ncols=get_tty_columns(),
                    dynamic_ncols=True,
                    desc='[Inferring]')


def batch_gen(source_gen):
    for batch in source_gen:
        assert isinstance(batch, dict)
        data = dict()

        for key in batch:
            if torch.cuda.device_count() >= 1:
                data[key] = batch[key].cuda()
            else:
                data[key] = batch[key]
Exemplo n.º 3
0
    def run(self, stage):
        stage_config = self.config['stage'][stage]

        # build data flow from the given data generator
        # single data flow
        if isinstance(stage_config['generator'], str):
            data_gen = self.generators[stage_config['generator']]
            class_names = data_gen.struct['DL'].ROIs
            n_steps = len(data_gen)
            gen_tags = None

        # multiple data flows
        elif isinstance(stage_config['generator'], dict):
            gens = [
                self.generators[cfg]
                for cfg in stage_config['generator'].values()
            ]
            data_gen = zip(*gens)
            class_names = gens[0].struct['DL'].ROIs
            n_steps = min([len(g) for g in gens])
            gen_tags = list(stage_config['generator'].keys())

        else:
            raise TypeError('generator of type %s is not supported.' %
                            type(stage_config['generator']))

        progress_bar = tqdm(data_gen,
                            total=n_steps,
                            ncols=get_tty_columns(),
                            dynamic_ncols=True,
                            desc='[%s] loss: %.5f, accu: %.5f' %
                            (stage, 0.0, 0.0))

        if stage not in self.step:
            self.step[stage] = 1

        task_result_list = {
            task_name: []
            for task_name in stage_config['task']
        }
        need_revert = 'revert' in stage_config and stage_config['revert']
        for batch in progress_bar:

            # format the batch into {tag: {key: data}}
            if gen_tags is None:
                assert isinstance(batch, dict)
                formatted_batch = {NO_TAG: dict()}
                for key in batch:
                    if torch.cuda.device_count() >= 1:
                        formatted_batch[NO_TAG][key] = batch[key].cuda()
                    else:
                        formatted_batch[NO_TAG][key] = batch[key]
            else:
                formatted_batch = dict()
                for (tag, tag_batch) in zip(gen_tags, batch):
                    tag_data = dict()
                    for key in tag_batch:
                        if torch.cuda.device_count() >= 1:
                            tag_data[key] = tag_batch[key].cuda()
                        else:
                            tag_data[key] = tag_batch[key]
                    formatted_batch[tag] = tag_data

            # execute each task in this stage
            for task_name in stage_config['task']:
                task_config = self.tasks[task_name]

                # skip the task periodically
                if 'period' in task_config \
                    and self.step[stage] % task_config['period'] != 0:
                    continue

                # modify the status of modules if the running task changed
                if self.running_task != task_name:

                    # toggle trainable parameters of each module
                    for (key, toggle) in task_config['toggle'].items():
                        self.handlers[key].model.train(toggle)
                        for param in self.handlers[key].model.parameters():
                            param.requires_grad = toggle

                    self.running_task = task_name

                task_result = self.run_task(
                    task_config,
                    formatted_batch,
                    need_revert=need_revert,
                )

                # detach all the results, move them to CPU, and convert them to numpy
                for key in task_result:
                    task_result[key] = task_result[key].detach().cpu().numpy()

                # average accuracy if multi-dim
                assert 'accu' in task_result
                if task_result['accu'].ndim == 0:
                    step_accu = math.nan if task_result[
                        'accu'] == math.nan else task_result['accu']
                else:
                    assert task_result['accu'].ndim == 1
                    empty = True
                    for acc in task_result['accu']:
                        if not np.isnan(acc):
                            empty = False
                            break
                    step_accu = math.nan if empty else np.nanmean(
                        task_result['accu'])

                assert 'loss' in task_result
                progress_bar.set_description(
                    '[%s][%s] loss: %.5f, accu: %.5f' %
                    (stage, task_name, task_result['loss'], step_accu))

                if self.logger is not None:
                    self.logger.add_scalar(
                        '%s/%s/step/loss' % (stage, task_name),
                        task_result['loss'], self.step[stage])
                    self.logger.add_scalar(
                        '%s/%s/step/accu' % (stage, task_name),
                        -1 if math.isnan(step_accu) else step_accu,
                        self.step[stage])

                task_result_list[task_name].append(task_result)

            self.step[stage] += 1

        # summarize the result list
        task_summary = dict()
        for (task_name, result_list) in task_result_list.items():

            if len(result_list) == 0:
                continue

            summary = dict()

            if need_revert:
                reverter = Reverter(data_gen)
                result_collection_blacklist = reverter.revertible

                scores = dict()
                progress_bar = tqdm(reverter.on_batches(result_list),
                                    total=len(reverter.data_list),
                                    dynamic_ncols=True,
                                    ncols=get_tty_columns(),
                                    desc='[Data index]')
                for reverted in progress_bar:
                    data_idx = reverted['idx']
                    scores[data_idx] = data_gen.struct['DL'].evaluate(
                        data_idx, reverted['prediction'])
                    info = '[%s] mean score: %.3f' % (
                        data_idx, np.mean(list(scores[data_idx].values())))
                    progress_bar.set_description(info)

                # summerize score of each class over data indices
                cls_scores = {
                    cls:
                    np.mean([scores[data_idx][cls] for data_idx in scores])
                    for cls in class_names
                }

                summary['scores'] = scores
                summary['cls_scores'] = cls_scores
                summary['cls_mean'] = np.mean(list(cls_scores.values()))

            else:
                result_collection_blacklist = []

            # collect results except those revertible ones, e.g., collect accu, loss
            summary.update({
                key:
                np.nanmean(np.vstack([result[key] for result in result_list]),
                           axis=0)
                for key in result_list[0].keys()
                if key not in result_collection_blacklist
            })

            # process 1D array accu to dictionary of each class score
            if len(summary['accu']) > 1:
                assert len(summary['accu']) == len(class_names), (len(
                    summary['accu']), len(class_names))
                summary['cls_accu'] = {
                    cls: summary['accu'][i]
                    for (i, cls) in enumerate(class_names)
                }
                summary['accu'] = summary['accu'].mean()

            # print summary info
            print('[%s][%s] Average: ' % (stage, task_name) + ', '.join([
                '%s: %.3f' % (key, val)
                for (key, val) in summary.items() if not isinstance(val, dict)
            ]))

            if 'cls_scores' in summary:
                print('Class score: ' + ', '.join([
                    '%s: %.3f' % (key, val)
                    for (key, val) in summary['cls_scores'].items()
                ]))
                print('Class mean: %.3f' % summary['cls_mean'])

            task_summary[task_name] = summary

        return task_summary
Exemplo n.º 4
0
    def run_invertible(self, stage):

        stage_config = self.config['stage'][stage]
        assert len(stage_config['task']) == 1

        if stage not in self.step:
            self.step[stage] = 1

        task_name = stage_config['task'][0]
        task_config = self.tasks[task_name]

        (data_gen, n_steps, gen_tags, class_names) = self.get_data_gen(stage)
        inv_gen = self.invertible_gen(stage, data_gen, n_steps, gen_tags,
                                      task_name, task_config)
        reverter = Reverter(data_gen)

        summary = dict()

        result_collection_blacklist = reverter.revertible

        # We use a mutable result list to collect the results during the validation
        result_list = []

        scores = dict()
        progress_bar = tqdm(reverter.on_future_batches(
            inv_gen,
            mutable_results=result_list,
        ),
                            total=len(reverter.data_list),
                            dynamic_ncols=True,
                            ncols=get_tty_columns(),
                            desc='[Data index]')
        for reverted in progress_bar:
            data_idx = reverted['idx']
            scores[data_idx] = data_gen.struct['DL'].evaluate(
                data_idx, reverted['prediction'])
            info = '[%s] mean score: %.3f' % (
                data_idx, np.mean(list(scores[data_idx].values())))
            progress_bar.set_description(info)

        # summerize score of each class over data indices
        cls_scores = {
            cls: np.mean([scores[data_idx][cls] for data_idx in scores])
            for cls in class_names
        }

        summary['scores'] = scores
        summary['cls_scores'] = cls_scores
        summary['cls_mean'] = np.mean(list(cls_scores.values()))

        # collect results except those revertible ones, e.g., collect accu, loss
        summary.update({
            key: np.nanmean(np.vstack([result[key] for result in result_list]),
                            axis=0)
            for key in result_list[0].keys()
            if key not in result_collection_blacklist
        })

        # process 1D array accu to dictionary of each class score
        if len(summary['accu']) > 1:
            assert len(summary['accu']) == len(class_names), (len(
                summary['accu']), len(class_names))
            summary['cls_accu'] = {
                cls: summary['accu'][i]
                for (i, cls) in enumerate(class_names)
            }
            summary['accu'] = summary['accu'].mean()

        # print summary info
        print('[%s][%s] Average: ' % (stage, task_name) + ', '.join([
            '%s: %.3f' % (key, val)
            for (key, val) in summary.items() if not isinstance(val, dict)
        ]))

        if 'cls_scores' in summary:
            print('Class score: ' + ', '.join([
                '%s: %.3f' % (key, val)
                for (key, val) in summary['cls_scores'].items()
            ]))
            print('Class mean: %.3f' % summary['cls_mean'])

        return {task_name: summary}
Exemplo n.º 5
0
    def invertible_gen(self, stage, data_gen, n_steps, gen_tags, task_name,
                       task_config):

        progress_bar = tqdm(data_gen,
                            total=n_steps,
                            ncols=get_tty_columns(),
                            dynamic_ncols=True,
                            desc='[%s] loss: %.5f, accu: %.5f' %
                            (stage, 0.0, 0.0))

        # modify the status of modules if the running task changed
        if self.running_task != task_name:

            # toggle trainable parameters of each module
            for (key, toggle) in task_config['toggle'].items():
                self.handlers[key].model.train(toggle)
                for param in self.handlers[key].model.parameters():
                    param.requires_grad = toggle

            self.running_task = task_name

        for batch in progress_bar:

            # format the batch into {tag: {key: data}}
            if gen_tags is None:
                assert isinstance(batch, dict)
                formatted_batch = {NO_TAG: dict()}
                for key in batch:
                    if torch.cuda.device_count() >= 1:
                        formatted_batch[NO_TAG][key] = batch[key].cuda()
                    else:
                        formatted_batch[NO_TAG][key] = batch[key]
            else:
                formatted_batch = dict()
                for (tag, tag_batch) in zip(gen_tags, batch):
                    tag_data = dict()
                    for key in tag_batch:
                        if torch.cuda.device_count() >= 1:
                            tag_data[key] = tag_batch[key].cuda()
                        else:
                            tag_data[key] = tag_batch[key]
                    formatted_batch[tag] = tag_data

            task_result = self.run_task(
                task_config,
                formatted_batch,
                need_revert=True,
            )

            # detach all the results, move them to CPU, and convert them to numpy
            for key in task_result:
                task_result[key] = task_result[key].detach().cpu().numpy()

            # average accuracy if multi-dim
            assert 'accu' in task_result
            if task_result['accu'].ndim == 0:
                step_accu = math.nan if task_result[
                    'accu'] == math.nan else task_result['accu']
            else:
                assert task_result['accu'].ndim == 1
                empty = True
                for acc in task_result['accu']:
                    if not np.isnan(acc):
                        empty = False
                        break
                step_accu = math.nan if empty else np.nanmean(
                    task_result['accu'])

            assert 'loss' in task_result
            progress_bar.set_description(
                '[%s][%s] loss: %.5f, accu: %.5f' %
                (stage, task_name, task_result['loss'], step_accu))

            if self.logger is not None:
                self.logger.add_scalar('%s/%s/step/loss' % (stage, task_name),
                                       task_result['loss'], self.step[stage])
                self.logger.add_scalar(
                    '%s/%s/step/accu' % (stage, task_name),
                    -1 if math.isnan(step_accu) else step_accu,
                    self.step[stage])

            self.step[stage] += 1
            yield task_result
Exemplo n.º 6
0
    def run(self, stage):
        stage_config = self.config['stage'][stage]

        # build data flow from the given data generator
        # single data flow
        if isinstance(stage_config['generator'], str):
            data_gen = self.generators[stage_config['generator']]
            class_names = data_gen.struct['DL'].ROIs
            n_steps = len(data_gen)
            gen_tags = None

        # multiple data flows
        elif isinstance(stage_config['generator'], dict):
            gens = [
                self.generators[cfg]
                for cfg in stage_config['generator'].values()
            ]
            data_gen = zip(*gens)
            class_names = gens[0].struct['DL'].ROIs
            n_steps = min([len(g) for g in gens])
            gen_tags = list(stage_config['generator'].keys())

            # the forward config should match the multiple data flows
            assert isinstance(stage_config['forward'], dict)
            assert gen_tags == list(stage_config['forward'].keys())

        else:
            raise TypeError('generator of type %s is not supported.' %
                            type(stage_config['generator']))

        progress_bar = tqdm(data_gen,
                            total=n_steps,
                            ncols=get_tty_columns(),
                            dynamic_ncols=True,
                            desc='[%s] loss: %.5f, accu: %.5f' %
                            (stage, 0.0, 0.0))

        if stage not in self.step:
            self.step[stage] = 1

        # toggle trainable parameters of each module
        need_backward = False
        for key, toggle in stage_config['toggle'].items():
            self.handlers[key].model.train(toggle)
            for param in self.handlers[key].model.parameters():
                param.requires_grad = toggle
            if toggle:
                need_backward = True

        result_list = []
        need_revert = 'revert' in stage_config and stage_config['revert']
        for batch in progress_bar:

            self.step[stage] += 1

            # single data flow
            if gen_tags is None:
                assert isinstance(batch, dict)

                # insert batch to data
                data = dict()
                for key in batch:
                    if torch.cuda.device_count() >= 1:
                        data[key] = batch[key].cuda()
                    else:
                        data[key] = batch[key]

                # forward
                for key in stage_config['forward']:
                    data.update(self.handlers[key].model(data))

            # multiple data flows
            else:
                assert isinstance(batch, tuple)
                data = dict()
                for (tag, tag_batch) in zip(gen_tags, batch):
                    tag_data = dict()

                    # insert batch to data
                    for key in tag_batch:
                        if torch.cuda.device_count() >= 1:
                            tag_data[key] = tag_batch[key].cuda()
                        else:
                            tag_data[key] = tag_batch[key]

                    # forward
                    for key in stage_config['forward'][tag]:
                        tag_data.update(self.handlers[key].model(tag_data))

                    # insert tag data back to the data
                    data.update({
                        '%s_%s' % (key, tag): tag_data[key]
                        for key in tag_data
                    })

            # compute loss and accuracy
            results = self.metrics[stage_config['metric']](data)

            # backpropagation
            if need_backward:
                results['loss'].backward()
                for key, toggle in stage_config['toggle'].items():
                    if toggle:
                        self.optims[key].step()
                        self.optims[key].zero_grad()

            # compute match for dice score of each case after reversion
            if need_revert:
                assert 'prediction' in data, list(data.keys())
                assert 'label' in data, list(data.keys())
                with torch.set_grad_enabled(False):
                    match, total = match_up(
                        data['prediction'],
                        data['label'],
                        needs_softmax=True,
                        batch_wise=True,
                        threshold=-1,
                    )
                    results.update({'match': match, 'total': total})

            # detach all results, move to CPU, and convert to numpy
            for key in results:
                results[key] = results[key].detach().cpu().numpy()

            # average accuracy if multi-dim
            assert 'accu' in results
            if results['accu'].ndim == 0:
                step_accu = math.nan if results[
                    'accu'] == math.nan else results['accu']
            else:
                assert results['accu'].ndim == 1
                empty = True
                for acc in results['accu']:
                    if not np.isnan(acc):
                        empty = False
                        break
                step_accu = math.nan if empty else np.nanmean(results['accu'])

            assert 'loss' in results
            progress_bar.set_description('[%s] loss: %.5f, accu: %.5f' %
                                         (stage, results['loss'], step_accu))

            if self.logger is not None:
                self.logger.add_scalar('%s/step/loss' % stage, results['loss'],
                                       self.step[stage])
                self.logger.add_scalar(
                    '%s/step/accu' % stage,
                    -1 if math.isnan(step_accu) else step_accu,
                    self.step[stage])

            result_list.append(results)

        summary = dict()
        if need_revert:
            reverter = Reverter(data_gen)
            result_collection_blacklist = reverter.revertible

            scores = dict()
            progress_bar = tqdm(reverter.on_batches(result_list),
                                total=len(reverter.data_list),
                                dynamic_ncols=True,
                                ncols=get_tty_columns(),
                                desc='[Data index]')
            for reverted in progress_bar:
                data_idx = reverted['idx']
                scores[data_idx] = reverted['score']
                info = '[%s] mean score: %.3f' % (
                    data_idx, np.mean(list(scores[data_idx].values())))
                progress_bar.set_description(info)

            # summerize score of each class over data indices
            cls_scores = {
                cls: np.mean([scores[data_idx][cls] for data_idx in scores])
                for cls in class_names
            }
            cls_scores.update(
                {'mean': np.mean([cls_scores[cls] for cls in class_names])})

            summary['scores'] = scores
            summary['cls_scores'] = cls_scores

        else:
            result_collection_blacklist = []

        # collect results except those revertible ones, e.g., accu, loss
        summary.update({
            key: np.nanmean(np.vstack([result[key] for result in result_list]),
                            axis=0)
            for key in result_list[0].keys()
            if key not in result_collection_blacklist
        })

        # process 1D array accu to dictionary of each class score
        if len(summary['accu']) > 1:
            assert len(summary['accu']) == len(class_names), (len(
                summary['accu']), len(class_names))
            summary['cls_accu'] = {
                cls: summary['accu'][i]
                for (i, cls) in enumerate(class_names)
            }
            summary['accu'] = summary['accu'].mean()

        # print summary info
        print('Average: ' + ', '.join([
            '%s: %.3f' % (key, val)
            for (key, val) in summary.items() if not isinstance(val, dict)
        ]))

        if 'cls_scores' in summary:
            print('Class score: ' + ', '.join([
                '%s: %.3f' % (key, val)
                for (key, val) in summary['cls_scores'].items()
            ]))

        return summary
Exemplo n.º 7
0
    def run(
        self,
        data_gen,
        training=True,
        stage=None,
        min_ratio=0.,
        **kwargs,
    ):
        if stage is None:
            stage = 'train' if training else 'valid'
        n_steps = len(data_gen)

        progress_bar = tqdm(data_gen,
                            total=n_steps,
                            ncols=get_tty_columns(),
                            dynamic_ncols=True,
                            desc='[%s] Loss: %.5f, Accu: %.5f' %
                            (stage, 0.0, 0.0))

        if stage not in self.step:
            self.step[stage] = 1

        result_list = []
        for batch in progress_bar:

            self.step[stage] += 1
            if self.logger is not None and 'label' in batch:
                ratio = (batch['label'] > 0).float().mean().item()
                self.logger.add_scalar('%s/quality/ratio' % stage, ratio,
                                       self.step[stage])
                if ratio < min_ratio:
                    continue

            data = {key: batch[key].cuda() for key in batch}

            if training:
                results = self.learner.learn(data, **kwargs)
            else:
                results = self.learner.infer(data, **kwargs)

            # detach all, move to CPU, and convert to numpy
            for key in results:
                results[key] = results[key].detach().cpu().numpy()

            if 'accu' in results:
                step_accu = np.nanmean(results['accu'])
            else:
                step_accu = math.nan

            if 'loss' in results:
                progress_bar.set_description(
                    '[%s] Loss: %.5f, Avg accu: %.5f' %
                    (stage, results['loss'], step_accu))

            if self.logger is not None:
                self.logger.add_scalar('%s/step/loss' % stage, results['loss'],
                                       self.step[stage])
                self.logger.add_scalar(
                    '%s/step/accu' % stage,
                    -1 if math.isnan(step_accu) else step_accu,
                    self.step[stage])

            # if include_prediction:
            #     # XXX: deprecated
            #     # output_threshold = 0.3
            #     # prediction = results.pop('prediction')
            #     # for i in range(1, prediction.shape[1]):
            #     #     prediction[:, i, ...] += \
            #     #         (prediction[:, i, ...] >= output_threshold).astype(np.float)
            #     # # prediction[:, 1:, ...] = (prediction[:, 1:, ...] >= output_threshold).astype(np.float)
            #     # prediction = np.argmax(prediction, 1)
            #     # prediction_list.append(prediction)

            #     prediction_list.append(results.pop('prediction'))

            result_list.append(results)

        return result_list
Exemplo n.º 8
0
    def run(
        self,
        data_gen,
        training=True,
        stage=None,
        unlabeled=False,  # FIXME
        train_dis=False,  # FIXME
        min_ratio=0.,
        include_prediction=False,
    ):
        if stage is None:
            stage = 'train' if training else 'valid'
        n_steps = len(data_gen)

        progress_bar = tqdm(data_gen,
                            total=n_steps,
                            ncols=get_tty_columns(),
                            dynamic_ncols=True,
                            desc='[%s] Loss: %.5f, Accu: %.5f' %
                            (stage, 0.0, 0.0))

        if stage not in self.step:
            self.step[stage] = 1

        result_list = []
        for batch in progress_bar:

            self.step[stage] += 1
            if self.logger is not None and 'label' in batch:
                ratio = (batch['label'] > 0).float().mean().item()
                self.logger.add_scalar('%s/quality/ratio' % stage, ratio,
                                       self.step[stage])
                if ratio < min_ratio:
                    continue

            data = {key: batch[key].cuda() for key in batch}

            # FIXME
            if train_dis:
                results = self.learners['seg'].infer(data)
                results.update(self.learners['dis'].learn(data))
            else:
                if training:
                    if unlabeled:
                        results = self.learners['seg'].learn_unlabeled(data)
                    else:
                        results = self.learners['seg'].learn(data)
                        if self.step[stage] >= self.start_adv:
                            results.update(self.learners['dis'].learn(data))
                else:
                    results = self.learners['seg'].infer(
                        data,
                        include_prediction=include_prediction,
                        compute_match=(not include_prediction))

            # detach all, move to CPU, and convert to numpy
            for key in results:
                results[key] = results[key].detach().cpu().numpy()

            if 'accu' in results:
                step_accu = np.nanmean(results['accu'])
            else:
                step_accu = math.nan

            progress_bar.set_description('[%s] Loss: %.5f, Avg accu: %.5f' %
                                         (stage, results['loss'], step_accu))

            if self.logger is not None:
                self.logger.add_scalar('%s/step/loss' % stage, results['loss'],
                                       self.step[stage])
                self.logger.add_scalar(
                    '%s/step/accu' % stage,
                    -1 if math.isnan(step_accu) else step_accu,
                    self.step[stage])
            result_list.append(results)

        return result_list