def _numerical_transforms(dispatchers, config, train_mode, suffix, **kwargs):
    if train_mode:
        feature_by_type_split, feature_by_type_split_valid = dispatchers
    else:
        feature_by_type_split = dispatchers

    log_num = Step(
        name='log_num{}'.format(suffix),
        transformer=make_transformer(lambda x: np.log(x + 1),
                                     output_name='numerical_features'),
        input_steps=[feature_by_type_split],
        adapter=Adapter(
            {'x': E(feature_by_type_split.name, 'numerical_features')}),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)

    if train_mode:
        log_num_valid = Step(
            name='log_num_valid{}'.format(suffix),
            transformer=log_num,
            input_steps=[feature_by_type_split_valid],
            adapter=Adapter({
                'x':
                E(feature_by_type_split_valid.name, 'numerical_features')
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return log_num, log_num_valid
    else:
        return log_num
Пример #2
0
def postprocessing(model, label_encoder, config):
    label_decoder = Step(name='label_decoder',
                         transformer=GoogleAiLabelDecoder(),
                         input_steps=[
                             label_encoder,
                         ],
                         experiment_directory=config.env.cache_dirpath)

    decoder = Step(
        name='decoder',
        transformer=DataDecoder(**config.postprocessing.data_decoder),
        input_steps=[
            model,
        ],
        experiment_directory=config.env.cache_dirpath)

    submission_producer = Step(
        name='submission_producer',
        transformer=PredictionFormatter(
            **config.postprocessing.prediction_formatter),
        input_steps=[label_decoder, decoder],
        input_data=['input'],
        adapter=Adapter({
            'image_ids':
            E('input', 'img_ids'),
            'results':
            E(decoder.name, 'results'),
            'decoder_dict':
            E(label_decoder.name, 'inverse_mapping')
        }),
        experiment_directory=config.env.cache_dirpath)
    return submission_producer
def _feature_by_type_splits(config, train_mode, suffix):
    if train_mode:
        feature_by_type_split = Step(
            name='inferred_type_splitter{}'.format(suffix),
            transformer=fe.InferredTypeSplitter(),
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X')}),
            experiment_directory=config.pipeline.experiment_directory)

        feature_by_type_split_valid = Step(
            name='inferred_type_splitter_valid{}'.format(suffix),
            transformer=feature_by_type_split,
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X_valid')}),
            experiment_directory=config.pipeline.experiment_directory)

        return feature_by_type_split, feature_by_type_split_valid

    else:
        feature_by_type_split = Step(
            name='inferred_type_splitter{}'.format(suffix),
            transformer=fe.InferredTypeSplitter(),
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X')}),
            experiment_directory=config.pipeline.experiment_directory)

    return feature_by_type_split
def preprocessing_inference(config, model_name='unet', suffix=''):
    if config.general.loader_mode == 'resize_and_pad':
        loader_config = config.loaders.resize_and_pad
    elif config.general.loader_mode == 'resize':
        loader_config = config.loaders.resize
    else:
        raise NotImplementedError

    if loader_config.dataset_params.image_source == 'memory':
        reader_inference = Step(name='reader_inference{}'.format(suffix),
                                transformer=loaders.ImageReader(train_mode=False, **config.reader[model_name]),
                                input_data=['input'],
                                adapter=Adapter({'meta': E('input', 'meta')}),

                                experiment_directory=config.execution.experiment_dir)

    elif loader_config.dataset_params.image_source == 'disk':
        reader_inference = Step(name='xy_inference{}'.format(suffix),
                                transformer=loaders.XYSplit(train_mode=False, **config.xy_splitter[model_name]),
                                input_data=['input'],
                                adapter=Adapter({'meta': E('input', 'meta')}),
                                experiment_directory=config.execution.experiment_dir)
    else:
        raise NotImplementedError

    loader = Step(name='loader{}'.format(suffix),
                  transformer=loaders.ImageSegmentationLoader(train_mode=False, **loader_config),
                  input_steps=[reader_inference],
                  adapter=Adapter({'X': E(reader_inference.name, 'X'),
                                   'y': E(reader_inference.name, 'y'),
                                   }),
                  experiment_directory=config.execution.experiment_dir,
                  cache_output=True)
    return loader
Пример #5
0
def _projection(projection_config, data_cleaned, config, train_mode, suffix, **kwargs):
    (DecompositionTransformer, transformer_config, transformer_name) = projection_config

    if train_mode:
        data_cleaned, data_cleaned_valid = data_cleaned

    projector = Step(name='{}{}'.format(transformer_name, suffix),
                     transformer=DecompositionTransformer(**transformer_config),
                     input_steps=[data_cleaned],
                     adapter=Adapter({'features': E(data_cleaned.name, 'numerical_features')}),
                     experiment_directory=config.pipeline.experiment_directory, **kwargs)

    projector_pandas = Step(name='{}_pandas{}'.format(transformer_name, suffix),
                            transformer=make_transformer(partial(to_pandas, column_prefix=transformer_name)
                                                         , output_name='numerical_features'),
                            input_steps=[projector],
                            adapter=Adapter({'x': E(projector.name, 'features')}),
                            experiment_directory=config.pipeline.experiment_directory, **kwargs)

    if train_mode:
        projector_valid = Step(name='{}_valid{}'.format(transformer_name, suffix),
                               transformer=projector,
                               input_steps=[data_cleaned_valid],
                               adapter=Adapter({'features': E(data_cleaned_valid.name, 'numerical_features')}
                                               ),
                               experiment_directory=config.pipeline.experiment_directory, **kwargs)
        projector_pandas_valid = Step(name='{}_pandas_valid{}'.format(transformer_name, suffix),
                                      transformer=projector_pandas,
                                      input_steps=[projector_valid],
                                      adapter=Adapter({'x': E(projector_valid.name, 'features')}),
                                      experiment_directory=config.pipeline.experiment_directory, **kwargs)
        return projector_pandas, projector_pandas_valid
    else:
        return projector_pandas
Пример #6
0
def visualizer(model, label_encoder, config):
    label_decoder = Step(name='label_decoder',
                         transformer=GoogleAiLabelDecoder(),
                         input_steps=[
                             label_encoder,
                         ],
                         experiment_directory=config.env.cache_dirpath)

    decoder = Step(
        name='decoder',
        transformer=DataDecoder(**config.postprocessing.data_decoder),
        input_data=['input'],
        input_steps=[
            model,
        ],
        experiment_directory=config.env.cache_dirpath)

    visualize = Step(name='visualizer',
                     transformer=Visualizer(),
                     input_steps=[label_decoder, decoder],
                     input_data=['input'],
                     adapter=Adapter({
                         'images_data':
                         E('input', 'images_data'),
                         'results':
                         E(decoder.name, 'results'),
                         'decoder_dict':
                         E(label_decoder.name, 'inverse_mapping')
                     }),
                     experiment_directory=config.env.cache_dirpath)

    return visualize
Пример #7
0
def _previous_applications_groupby_agg(config, train_mode, suffix, **kwargs):
    previous_applications_groupby_agg = Step(
        name='previous_applications_groupby_agg{}'.format(suffix),
        transformer=fe.GroupbyAggregateMerge(**config.previous_applications),
        input_data=['application', 'previous_application'],
        adapter=Adapter({
            'main_table': E('application', 'X'),
            'side_table': E('previous_application', 'X')
        }),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)
    if train_mode:
        previous_applications_groupby_agg_valid = Step(
            name='previous_applications_groupby_agg_valid{}'.format(suffix),
            transformer=previous_applications_groupby_agg,
            input_data=['application', 'previous_application'],
            adapter=Adapter({
                'main_table': E('application', 'X_valid'),
                'side_table': E('previous_application', 'X')
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return previous_applications_groupby_agg, previous_applications_groupby_agg_valid
    else:
        return previous_applications_groupby_agg
Пример #8
0
def _bureau(config, train_mode, **kwargs):
    if train_mode:
        bureau = Step(name='bureau',
                      transformer=fe.GroupbyAggregationFromFile(**config.bureau),
                      input_data=['input'],
                      adapter=Adapter({'X': E('input', 'X')}),
                      experiment_directory=config.pipeline.experiment_directory,
                      **kwargs)

        bureau_valid = Step(name='bureau_valid',
                            transformer=bureau,
                            input_data=['input'],
                            adapter=Adapter({'X': E('input', 'X_valid')}),
                            experiment_directory=config.pipeline.experiment_directory,
                            **kwargs)

        return bureau, bureau_valid

    else:
        bureau = Step(name='bureau',
                      transformer=fe.GroupbyAggregationFromFile(**config.bureau),
                      input_data=['input'],
                      adapter=Adapter({'X': E('input', 'X')}),
                      experiment_directory=config.pipeline.experiment_directory,
                      **kwargs)

        return bureau
Пример #9
0
def preprocessing_inference_tta(config, model_name='unet', suffix=''):
    reader_inference = Step(name='reader_inference{}'.format(suffix),
                            transformer=loaders.XYSplit(
                                train_mode=False,
                                **config.xy_splitter[model_name]),
                            input_data=['input'],
                            adapter=Adapter({'meta': E('input', 'meta')}),
                            experiment_directory=config.env.experiment_dir)

    tta_generator = Step(name='tta_generator{}'.format(suffix),
                         transformer=loaders.MetaTestTimeAugmentationGenerator(
                             **config.tta_generator),
                         input_steps=[reader_inference],
                         adapter=Adapter({'X': E('reader_inference', 'X')}),
                         experiment_directory=config.env.experiment_dir)

    if config.execution.loader_mode == 'crop_and_pad':
        Loader = loaders.ImageSegmentationLoaderCropPadTTA
    elif config.execution.loader_mode == 'resize':
        Loader = loaders.ImageSegmentationLoaderResizeTTA
    else:
        raise NotImplementedError

    loader = Step(name='loader{}'.format(suffix),
                  transformer=Loader(**config.loader),
                  input_steps=[tta_generator],
                  adapter=Adapter({
                      'X':
                      E(tta_generator.name, 'X_tta'),
                      'tta_params':
                      E(tta_generator.name, 'tta_params'),
                  }),
                  experiment_directory=config.env.experiment_dir,
                  cache_output=True)
    return loader, tta_generator
def _application_groupby_agg(config, train_mode, suffix, **kwargs):
    application_groupby_agg = Step(
        name='application_groupby_agg{}'.format(suffix),
        transformer=fe.GroupbyAggregateDiffs(
            **config.applications.aggregations),
        input_data=['application'],
        adapter=Adapter({'main_table': E('application', 'X')}),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)

    if train_mode:

        application_groupby_agg_valid = Step(
            name='application_groupby_agg_valid{}'.format(suffix),
            transformer=application_groupby_agg,
            input_data=['application'],
            adapter=Adapter({
                'main_table': E('application', 'X_valid'),
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)

        return application_groupby_agg, application_groupby_agg_valid

    else:
        return application_groupby_agg
Пример #11
0
def data_cleaning_v2(config, train_mode, suffix, **kwargs):
    cleaned_data = data_cleaning_v1(config, train_mode, suffix, **kwargs)

    if train_mode:
        cleaned_data, cleaned_data_valid = cleaned_data

    impute_missing = Step(name='dummies_missing{}'.format(suffix),
                          transformer=dc.DummiesMissing(**config.dummies_missing),
                          input_steps=[cleaned_data],
                          adapter=Adapter({'X': E(cleaned_data.name, 'numerical_features'),
                                           }
                                          ),
                          experiment_directory=config.pipeline.experiment_directory, **kwargs)

    if train_mode:
        impute_missing_valid = Step(name='dummies_missing_valid{}'.format(suffix),
                                    transformer=impute_missing,
                                    input_steps=[cleaned_data_valid],
                                    adapter=Adapter({'X': E(cleaned_data_valid.name, 'numerical_features'),
                                                     }
                                                    ),
                                    experiment_directory=config.pipeline.experiment_directory, **kwargs)
        return impute_missing, impute_missing_valid
    else:
        return impute_missing
Пример #12
0
def preprocessing_inference(config, model_name='unet', suffix=''):
    if config.execution.loader_mode == 'crop_and_pad':
        Loader = loaders.ImageSegmentationLoaderCropPad
    elif config.execution.loader_mode == 'resize':
        Loader = loaders.ImageSegmentationLoaderResize
    else:
        raise NotImplementedError

    reader_inference = Step(name='xy_inference{}'.format(suffix),
                            transformer=loaders.XYSplit(
                                train_mode=False,
                                **config.xy_splitter[model_name]),
                            input_data=['input'],
                            adapter=Adapter({'meta': E('input', 'meta')}),
                            experiment_directory=config.env.experiment_dir)

    loader = Step(name='loader{}'.format(suffix),
                  transformer=Loader(train_mode=False, **config.loader),
                  input_steps=[reader_inference],
                  adapter=Adapter({
                      'X': E(reader_inference.name, 'X'),
                      'y': E(reader_inference.name, 'y'),
                  }),
                  experiment_directory=config.env.experiment_dir,
                  cache_output=True)
    return loader
def network(config, suffix='', train_mode=True):
    if train_mode:
        preprocessing = emptiness_preprocessing_train(config, model_name='network', suffix=suffix)
    else:
        preprocessing = emptiness_preprocessing_inference(config, suffix=suffix)

    network = utils.FineTuneStep(name='network{}'.format(suffix),
                                 transformer=models.SegmentationModel(**config.model['network']),
                                 input_data=['callback_input'],
                                 input_steps=[preprocessing],
                                 adapter=Adapter({'datagen': E(preprocessing.name, 'datagen'),
                                                  'validation_datagen': E(preprocessing.name, 'validation_datagen'),
                                                  'meta_valid': E('callback_input', 'meta_valid'),
                                                  }),
                                 is_trainable=True,
                                 fine_tuning=config.model.network.training_config.fine_tuning,
                                 experiment_directory=config.execution.experiment_dir)

    mask_resize = Step(name='mask_resize{}'.format(suffix),
                       transformer=utils.make_apply_transformer(partial(postprocessing.resize_emptiness_predictions,
                                                                        target_size=config.general.original_size),
                                                                output_name='resized_images',
                                                                apply_on=['images']),
                       input_steps=[network],
                       adapter=Adapter({'images': E(network.name, 'mask_prediction'),
                                        }),
                       experiment_directory=config.execution.experiment_dir)

    return mask_resize
Пример #14
0
def _credit_card_balance_groupby_agg(config, train_mode, suffix, **kwargs):
    credit_card_balance_groupby_agg = Step(
        name='credit_card_balance_groupby_agg{}'.format(suffix),
        transformer=fe.GroupbyAggregateMerge(**config.credit_card_balance),
        input_data=['application', 'credit_card_balance'],
        adapter=Adapter({
            'main_table': E('application', 'X'),
            'side_table': E('credit_card_balance', 'X')
        }),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)
    if train_mode:
        credit_card_balance_groupby_agg_valid = Step(
            name='credit_card_balance_groupby_agg_valid{}'.format(suffix),
            transformer=credit_card_balance_groupby_agg,
            input_data=['application', 'credit_card_balance'],
            adapter=Adapter({
                'main_table': E('application', 'X_valid'),
                'side_table': E('credit_card_balance', 'X')
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return credit_card_balance_groupby_agg, credit_card_balance_groupby_agg_valid

    else:
        return credit_card_balance_groupby_agg
Пример #15
0
def _tap4fun(config, train_mode, suffix, **kwargs):
    if train_mode:
        tap4fun_cleaning, tap4fun_cleaning_valid = _tap4fun_cleaning(
            config, train_mode, suffix, **kwargs)
    else:
        tap4fun_cleaning = _tap4fun_cleaning(config, train_mode, suffix,
                                             **kwargs)

    tap4fun = Step(name='tap4fun_hand_crafted{}'.format(suffix),
                   transformer=fe.Tap4funFeatures(**config.tap4fun.columns),
                   input_steps=[tap4fun_cleaning],
                   adapter=Adapter({'X': E(tap4fun_cleaning.name, 'X')}),
                   experiment_directory=config.pipeline.experiment_directory,
                   **kwargs)
    if train_mode:
        tap4fun_valid = Step(
            name='tap4fun_hand_crafted_valid{}'.format(suffix),
            transformer=tap4fun.transformer,
            input_steps=[tap4fun_cleaning_valid],
            adapter=Adapter({'X': E(tap4fun_cleaning_valid.name, 'X')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return tap4fun, tap4fun_valid
    else:
        return tap4fun
Пример #16
0
def _tap4fun_groupby_agg(config, train_mode, suffix, **kwargs):
    tap4fun_groupby_agg = Step(
        name='tap4fun_groupby_agg{}'.format(suffix),
        transformer=fe.GroupbyAggregate(**config.tap4fun.aggregations),
        is_trainable=True,
        input_data=['tap4fun'],
        adapter=Adapter({'main_table': E('tap4fun', 'X')}),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)

    if train_mode:
        tap4fun_groupby_agg_valid = Step(
            name='tap4fun_groupby_agg_valid{}'.format(suffix),
            transformer=tap4fun_groupby_agg.transformer,
            input_data=['tap4fun'],
            adapter=Adapter({
                'main_table': E('tap4fun', 'X_valid'),
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)

        return tap4fun_groupby_agg, tap4fun_groupby_agg_valid

    else:
        return tap4fun_groupby_agg
Пример #17
0
def select_features_from_model(features, features_valid, config, train_mode,
                               suffix, **kwargs):
    select_features_step = Step(
        name='select_features_from_model{}'.format(suffix),
        transformer=fe.SelectFeaturesFromModel(threshold='median'),
        input_data=['tap4fun'],
        input_steps=[features],
        is_trainable=True,
        adapter=Adapter({
            'X': E(features.name, 'features'),
            'y': E('tap4fun', 'y')
        }),
        experiment_directory=config.pipeline.experiment_directory,
    )
    if train_mode:
        select_features_valid_step = Step(
            name='select_features_from_model_valid{}'.format(suffix),
            transformer=select_features_step.transformer,
            input_steps=[features_valid],
            adapter=Adapter({'X': E(features_valid.name, 'features')}),
            experiment_directory=config.pipeline.experiment_directory,
        )
        return select_features_step, select_features_valid_step
    else:
        return select_features_step
Пример #18
0
def preprocessing_fillna(features, config, train_mode, suffix, **kwargs):
    """
        impute missing value by condition
    """
    if train_mode:
        features_train, features_valid = features
        fillna = Step(
            name='fillna{}'.format(suffix),
            transformer=_fillna(
                config.preprocessing.impute_missing.fill_value),
            input_steps=[features_train, features_valid],
            adapter=Adapter({
                'X': E(features_train.name, 'features'),
                'X_valid': E(features_valid.name, 'features'),
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
    else:
        fillna = Step(
            name='fillna{}'.format(suffix),
            transformer=_fillna(
                config.preprocessing.impute_missing.fill_value),
            input_steps=[features],
            adapter=Adapter({'X': E(features.name, 'features')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
    return fillna
def postprocessing_pipeline_simplified(cache_dirpath, loader_mode):
    if loader_mode == 'resize_and_pad':
        size_adjustment_function = partial(crop_image, target_size=ORIGINAL_SIZE)
    elif loader_mode == 'resize':
        size_adjustment_function = partial(resize_image, target_size=ORIGINAL_SIZE)
    else:
        raise NotImplementedError

    mask_resize = Step(name='mask_resize',
                       transformer=make_apply_transformer(size_adjustment_function,
                                                          output_name='resized_images',
                                                          apply_on=['images']),
                       input_data=['unet_output'],
                       adapter=Adapter({'images': E('unet_output', 'mask_prediction'),
                                        }),
                       experiment_directory=cache_dirpath)

    binarizer = Step(name='binarizer',
                     transformer=make_apply_transformer(
                         partial(binarize, threshold=THRESHOLD),
                         output_name='binarized_images',
                         apply_on=['images']),
                     input_steps=[mask_resize],
                     adapter=Adapter({'images': E(mask_resize.name, 'resized_images'),
                                      }),
                     experiment_directory=cache_dirpath)

    output = Step(name='output',
                  transformer=IdentityOperation(),
                  input_steps=[binarizer],
                  adapter=Adapter({'y_pred': E(binarizer.name, 'binarized_images'),
                                   }),
                  experiment_directory=cache_dirpath)

    return output
Пример #20
0
def row_aggregation_features(config, train_mode, suffix, **kwargs):
    bucket_nrs = config.row_aggregations.bucket_nrs
    row_agg_features = []
    for bucket_nr in bucket_nrs:
        row_agg_feature = Step(
            name='row_agg_feature_bucket_nr{}{}'.format(bucket_nr, suffix),
            transformer=fe.RowAggregationFeatures(bucket_nr=bucket_nr),
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        row_agg_features.append(row_agg_feature)

    if train_mode:
        row_agg_features_valid = []
        for bucket_nr, row_agg_feature in zip(bucket_nrs, row_agg_features):
            row_agg_feature_valid = Step(
                name='row_agg_feature_bucket_nr{}_valid{}'.format(
                    bucket_nr, suffix),
                transformer=row_agg_feature,
                input_data=['input'],
                adapter=Adapter({'X': E('input', 'X_valid')}),
                experiment_directory=config.pipeline.experiment_directory,
                **kwargs)
            row_agg_features_valid.append(row_agg_feature_valid)

        return row_agg_features, row_agg_features_valid
    else:
        return row_agg_features
def mask_postprocessing(config, suffix=''):
    if config.general.loader_mode == 'crop_and_pad':
        size_adjustment_function = partial(
            crop_image, target_size=config.general.original_size)
    elif config.general.loader_mode == 'resize':
        size_adjustment_function = partial(
            resize_image, target_size=config.general.original_size)
    else:
        raise NotImplementedError

    mask_resize = Step(name='mask_resize{}'.format(suffix),
                       transformer=make_apply_transformer(
                           size_adjustment_function,
                           output_name='resized_images',
                           apply_on=['images']),
                       input_data=['input_masks'],
                       adapter=Adapter({
                           'images':
                           E('input_masks', 'mask_prediction'),
                       }),
                       experiment_directory=config.execution.experiment_dir)

    binarizer = Step(name='binarizer{}'.format(suffix),
                     transformer=make_apply_transformer(
                         partial(binarize,
                                 threshold=config.thresholder.threshold_masks),
                         output_name='binarized_images',
                         apply_on=['images']),
                     input_steps=[mask_resize],
                     adapter=Adapter({
                         'images':
                         E(mask_resize.name, 'resized_images'),
                     }),
                     experiment_directory=config.execution.experiment_dir)
    return binarizer
Пример #22
0
def _feature_by_type_splits(config, train_mode):
    if train_mode:
        feature_by_type_split = Step(name='feature_by_type_split',
                                     transformer=fe.DataFrameByTypeSplitter(
                                         **config.dataframe_by_type_splitter),
                                     input_data=['input'],
                                     adapter=Adapter({'X': E('input', 'X')}),
                                     cache_dirpath=config.env.cache_dirpath)

        feature_by_type_split_valid = Step(
            name='feature_by_type_split_valid',
            transformer=feature_by_type_split,
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X_valid')}),
            cache_dirpath=config.env.cache_dirpath)

        return feature_by_type_split, feature_by_type_split_valid

    else:
        feature_by_type_split = Step(name='feature_by_type_split',
                                     transformer=fe.DataFrameByTypeSplitter(
                                         **config.dataframe_by_type_splitter),
                                     input_data=['input'],
                                     adapter=Adapter({'X': E('input', 'X')}),
                                     cache_dirpath=config.env.cache_dirpath)

    return feature_by_type_split
Пример #23
0
def preprocessing_inference_tta(config, model_name='network'):
    if config.general.loader_mode == 'resize':
        loader_config = config.loaders.resize_tta
        LOADER = loaders.ImageSegmentationLoaderResizeTTA
    else:
        raise NotImplementedError

    reader_inference = Step(name='reader_inference',
                            transformer=loaders.MetaReader(
                                train_mode=False,
                                **config.meta_reader[model_name]),
                            input_data=['input'],
                            adapter=Adapter({'meta': E('input', 'meta')}))

    tta_generator = Step(name='tta_generator',
                         transformer=loaders.MetaTestTimeAugmentationGenerator(
                             **config.tta_generator),
                         input_steps=[reader_inference],
                         adapter=Adapter({'X': E('reader_inference', 'X')}))

    loader = Step(name='loader',
                  transformer=LOADER(**loader_config),
                  input_steps=[tta_generator],
                  adapter=Adapter({
                      'X':
                      E(tta_generator.name, 'X_tta'),
                      'tta_params':
                      E(tta_generator.name, 'tta_params'),
                  }))
    return loader, tta_generator
Пример #24
0
def train_segmentation_pipeline(config):
    preprocessing = pipelines.preprocessing_train(
        config, model_name='segmentation_network')

    segmentation_network = misc.FineTuneStep(
        name='segmentation_network',
        transformer=models.SegmentationModel(
            **config.model['segmentation_network']),
        input_data=['callback_input'],
        input_steps=[preprocessing],
        adapter=Adapter({
            'datagen':
            E(preprocessing.name, 'datagen'),
            'validation_datagen':
            E(preprocessing.name, 'validation_datagen'),
            'meta_valid':
            E('callback_input', 'meta_valid'),
        }))

    segmentation_network.set_mode_train()
    segmentation_network.set_parameters_upstream({
        'experiment_directory':
        config.execution.experiment_dir,
    })
    segmentation_network.force_fitting = False
    segmentation_network.fine_tuning = config.model.segmentation_network.training_config.fine_tuning
    return segmentation_network
def _application(config, train_mode, suffix, **kwargs):
    if train_mode:
        application_cleaning, application_cleaning_valid = _application_cleaning(
            config, train_mode, suffix, **kwargs)
    else:
        application_cleaning = _application_cleaning(config, train_mode,
                                                     suffix, **kwargs)

    application = Step(
        name='application_hand_crafted{}'.format(suffix),
        transformer=fe.ApplicationFeatures(**config.applications.columns),
        input_steps=[application_cleaning],
        adapter=Adapter({'X': E(application_cleaning.name, 'X')}),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)
    if train_mode:
        application_valid = Step(
            name='application_hand_crafted_valid{}'.format(suffix),
            transformer=application,
            input_steps=[application_cleaning_valid],
            adapter=Adapter({'X': E(application_cleaning_valid.name, 'X')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return application, application_valid
    else:
        return application
def _application_groupby_agg(config, train_mode, suffix, **kwargs):
    if train_mode:
        application_cleaning, application_cleaning_valid = _application_cleaning(
            config, train_mode, suffix, **kwargs)
    else:
        application_cleaning = _application_cleaning(config, train_mode,
                                                     suffix, **kwargs)

    application_groupby_agg = Step(
        name='application_groupby_agg{}'.format(suffix),
        transformer=fe.GroupbyAggregateDiffs(
            **config.applications.aggregations),
        input_steps=[application_cleaning],
        adapter=Adapter({'main_table': E(application_cleaning.name, 'X')}),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)

    if train_mode:

        application_groupby_agg_valid = Step(
            name='application_groupby_agg_valid{}'.format(suffix),
            transformer=application_groupby_agg,
            input_steps=[application_cleaning_valid],
            adapter=Adapter(
                {'main_table': E(application_cleaning_valid.name, 'X')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)

        return application_groupby_agg, application_groupby_agg_valid

    else:
        return application_groupby_agg
def _categorical_encoders(config, train_mode, suffix, **kwargs):
    categorical_encoder = Step(
        name='categorical_encoder{}'.format(suffix),
        transformer=fe.CategoricalEncoder(
            **config.preprocessing.categorical_encoder),
        input_data=['application'],
        adapter=Adapter({
            'X': E('application', 'X'),
            'y': E('application', 'y')
        }),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)
    if train_mode:
        categorical_encoder_valid = Step(
            name='categorical_encoder_valid{}'.format(suffix),
            transformer=categorical_encoder,
            input_data=['application'],
            adapter=Adapter({
                'X': E('application', 'X_valid'),
                'y': E('application', 'y_valid')
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return categorical_encoder, categorical_encoder_valid
    else:
        return categorical_encoder
Пример #28
0
def test_adapter_creates_defined_keys(data):
    adapter = Adapter({
        'X': [E('input_1', 'features')],
        'Y': [E('input_2', 'extra_features')]
    })
    res = adapter.adapt(data)

    assert {'X', 'Y'} == set(res.keys())
Пример #29
0
def test_recipe_with_single_item(data):
    adapter = Adapter({
        'X': E('input_1', 'labels'),
        'Y': E('input_3', 'labels'),
    })
    res = adapter.adapt(data)

    assert np.array_equal(res['X'], data['input_1']['labels'])
    assert np.array_equal(res['Y'], data['input_3']['labels'])
Пример #30
0
def aggregator(name, model, tta_generator, experiment_directory, config):
    tta_aggregator = Step(name=name,
                          transformer=loaders.TestTimeAugmentationAggregator(**config),
                          input_steps=[model, tta_generator],
                          adapter=Adapter({'images': E(model.name, 'mask_prediction'),
                                           'tta_params': E(tta_generator.name, 'tta_params'),
                                           'img_ids': E(tta_generator.name, 'img_ids'),
                                           }),
                          experiment_directory=experiment_directory)
    return tta_aggregator