def _numerical_transforms(dispatchers, config, train_mode, suffix, **kwargs): if train_mode: feature_by_type_split, feature_by_type_split_valid = dispatchers else: feature_by_type_split = dispatchers log_num = Step( name='log_num{}'.format(suffix), transformer=make_transformer(lambda x: np.log(x + 1), output_name='numerical_features'), input_steps=[feature_by_type_split], adapter=Adapter( {'x': E(feature_by_type_split.name, 'numerical_features')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: log_num_valid = Step( name='log_num_valid{}'.format(suffix), transformer=log_num, input_steps=[feature_by_type_split_valid], adapter=Adapter({ 'x': E(feature_by_type_split_valid.name, 'numerical_features') }), experiment_directory=config.pipeline.experiment_directory, **kwargs) return log_num, log_num_valid else: return log_num
def postprocessing(model, label_encoder, config): label_decoder = Step(name='label_decoder', transformer=GoogleAiLabelDecoder(), input_steps=[ label_encoder, ], experiment_directory=config.env.cache_dirpath) decoder = Step( name='decoder', transformer=DataDecoder(**config.postprocessing.data_decoder), input_steps=[ model, ], experiment_directory=config.env.cache_dirpath) submission_producer = Step( name='submission_producer', transformer=PredictionFormatter( **config.postprocessing.prediction_formatter), input_steps=[label_decoder, decoder], input_data=['input'], adapter=Adapter({ 'image_ids': E('input', 'img_ids'), 'results': E(decoder.name, 'results'), 'decoder_dict': E(label_decoder.name, 'inverse_mapping') }), experiment_directory=config.env.cache_dirpath) return submission_producer
def _feature_by_type_splits(config, train_mode, suffix): if train_mode: feature_by_type_split = Step( name='inferred_type_splitter{}'.format(suffix), transformer=fe.InferredTypeSplitter(), input_data=['input'], adapter=Adapter({'X': E('input', 'X')}), experiment_directory=config.pipeline.experiment_directory) feature_by_type_split_valid = Step( name='inferred_type_splitter_valid{}'.format(suffix), transformer=feature_by_type_split, input_data=['input'], adapter=Adapter({'X': E('input', 'X_valid')}), experiment_directory=config.pipeline.experiment_directory) return feature_by_type_split, feature_by_type_split_valid else: feature_by_type_split = Step( name='inferred_type_splitter{}'.format(suffix), transformer=fe.InferredTypeSplitter(), input_data=['input'], adapter=Adapter({'X': E('input', 'X')}), experiment_directory=config.pipeline.experiment_directory) return feature_by_type_split
def preprocessing_inference(config, model_name='unet', suffix=''): if config.general.loader_mode == 'resize_and_pad': loader_config = config.loaders.resize_and_pad elif config.general.loader_mode == 'resize': loader_config = config.loaders.resize else: raise NotImplementedError if loader_config.dataset_params.image_source == 'memory': reader_inference = Step(name='reader_inference{}'.format(suffix), transformer=loaders.ImageReader(train_mode=False, **config.reader[model_name]), input_data=['input'], adapter=Adapter({'meta': E('input', 'meta')}), experiment_directory=config.execution.experiment_dir) elif loader_config.dataset_params.image_source == 'disk': reader_inference = Step(name='xy_inference{}'.format(suffix), transformer=loaders.XYSplit(train_mode=False, **config.xy_splitter[model_name]), input_data=['input'], adapter=Adapter({'meta': E('input', 'meta')}), experiment_directory=config.execution.experiment_dir) else: raise NotImplementedError loader = Step(name='loader{}'.format(suffix), transformer=loaders.ImageSegmentationLoader(train_mode=False, **loader_config), input_steps=[reader_inference], adapter=Adapter({'X': E(reader_inference.name, 'X'), 'y': E(reader_inference.name, 'y'), }), experiment_directory=config.execution.experiment_dir, cache_output=True) return loader
def _projection(projection_config, data_cleaned, config, train_mode, suffix, **kwargs): (DecompositionTransformer, transformer_config, transformer_name) = projection_config if train_mode: data_cleaned, data_cleaned_valid = data_cleaned projector = Step(name='{}{}'.format(transformer_name, suffix), transformer=DecompositionTransformer(**transformer_config), input_steps=[data_cleaned], adapter=Adapter({'features': E(data_cleaned.name, 'numerical_features')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) projector_pandas = Step(name='{}_pandas{}'.format(transformer_name, suffix), transformer=make_transformer(partial(to_pandas, column_prefix=transformer_name) , output_name='numerical_features'), input_steps=[projector], adapter=Adapter({'x': E(projector.name, 'features')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: projector_valid = Step(name='{}_valid{}'.format(transformer_name, suffix), transformer=projector, input_steps=[data_cleaned_valid], adapter=Adapter({'features': E(data_cleaned_valid.name, 'numerical_features')} ), experiment_directory=config.pipeline.experiment_directory, **kwargs) projector_pandas_valid = Step(name='{}_pandas_valid{}'.format(transformer_name, suffix), transformer=projector_pandas, input_steps=[projector_valid], adapter=Adapter({'x': E(projector_valid.name, 'features')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return projector_pandas, projector_pandas_valid else: return projector_pandas
def visualizer(model, label_encoder, config): label_decoder = Step(name='label_decoder', transformer=GoogleAiLabelDecoder(), input_steps=[ label_encoder, ], experiment_directory=config.env.cache_dirpath) decoder = Step( name='decoder', transformer=DataDecoder(**config.postprocessing.data_decoder), input_data=['input'], input_steps=[ model, ], experiment_directory=config.env.cache_dirpath) visualize = Step(name='visualizer', transformer=Visualizer(), input_steps=[label_decoder, decoder], input_data=['input'], adapter=Adapter({ 'images_data': E('input', 'images_data'), 'results': E(decoder.name, 'results'), 'decoder_dict': E(label_decoder.name, 'inverse_mapping') }), experiment_directory=config.env.cache_dirpath) return visualize
def _previous_applications_groupby_agg(config, train_mode, suffix, **kwargs): previous_applications_groupby_agg = Step( name='previous_applications_groupby_agg{}'.format(suffix), transformer=fe.GroupbyAggregateMerge(**config.previous_applications), input_data=['application', 'previous_application'], adapter=Adapter({ 'main_table': E('application', 'X'), 'side_table': E('previous_application', 'X') }), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: previous_applications_groupby_agg_valid = Step( name='previous_applications_groupby_agg_valid{}'.format(suffix), transformer=previous_applications_groupby_agg, input_data=['application', 'previous_application'], adapter=Adapter({ 'main_table': E('application', 'X_valid'), 'side_table': E('previous_application', 'X') }), experiment_directory=config.pipeline.experiment_directory, **kwargs) return previous_applications_groupby_agg, previous_applications_groupby_agg_valid else: return previous_applications_groupby_agg
def _bureau(config, train_mode, **kwargs): if train_mode: bureau = Step(name='bureau', transformer=fe.GroupbyAggregationFromFile(**config.bureau), input_data=['input'], adapter=Adapter({'X': E('input', 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) bureau_valid = Step(name='bureau_valid', transformer=bureau, input_data=['input'], adapter=Adapter({'X': E('input', 'X_valid')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return bureau, bureau_valid else: bureau = Step(name='bureau', transformer=fe.GroupbyAggregationFromFile(**config.bureau), input_data=['input'], adapter=Adapter({'X': E('input', 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return bureau
def preprocessing_inference_tta(config, model_name='unet', suffix=''): reader_inference = Step(name='reader_inference{}'.format(suffix), transformer=loaders.XYSplit( train_mode=False, **config.xy_splitter[model_name]), input_data=['input'], adapter=Adapter({'meta': E('input', 'meta')}), experiment_directory=config.env.experiment_dir) tta_generator = Step(name='tta_generator{}'.format(suffix), transformer=loaders.MetaTestTimeAugmentationGenerator( **config.tta_generator), input_steps=[reader_inference], adapter=Adapter({'X': E('reader_inference', 'X')}), experiment_directory=config.env.experiment_dir) if config.execution.loader_mode == 'crop_and_pad': Loader = loaders.ImageSegmentationLoaderCropPadTTA elif config.execution.loader_mode == 'resize': Loader = loaders.ImageSegmentationLoaderResizeTTA else: raise NotImplementedError loader = Step(name='loader{}'.format(suffix), transformer=Loader(**config.loader), input_steps=[tta_generator], adapter=Adapter({ 'X': E(tta_generator.name, 'X_tta'), 'tta_params': E(tta_generator.name, 'tta_params'), }), experiment_directory=config.env.experiment_dir, cache_output=True) return loader, tta_generator
def _application_groupby_agg(config, train_mode, suffix, **kwargs): application_groupby_agg = Step( name='application_groupby_agg{}'.format(suffix), transformer=fe.GroupbyAggregateDiffs( **config.applications.aggregations), input_data=['application'], adapter=Adapter({'main_table': E('application', 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: application_groupby_agg_valid = Step( name='application_groupby_agg_valid{}'.format(suffix), transformer=application_groupby_agg, input_data=['application'], adapter=Adapter({ 'main_table': E('application', 'X_valid'), }), experiment_directory=config.pipeline.experiment_directory, **kwargs) return application_groupby_agg, application_groupby_agg_valid else: return application_groupby_agg
def data_cleaning_v2(config, train_mode, suffix, **kwargs): cleaned_data = data_cleaning_v1(config, train_mode, suffix, **kwargs) if train_mode: cleaned_data, cleaned_data_valid = cleaned_data impute_missing = Step(name='dummies_missing{}'.format(suffix), transformer=dc.DummiesMissing(**config.dummies_missing), input_steps=[cleaned_data], adapter=Adapter({'X': E(cleaned_data.name, 'numerical_features'), } ), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: impute_missing_valid = Step(name='dummies_missing_valid{}'.format(suffix), transformer=impute_missing, input_steps=[cleaned_data_valid], adapter=Adapter({'X': E(cleaned_data_valid.name, 'numerical_features'), } ), experiment_directory=config.pipeline.experiment_directory, **kwargs) return impute_missing, impute_missing_valid else: return impute_missing
def preprocessing_inference(config, model_name='unet', suffix=''): if config.execution.loader_mode == 'crop_and_pad': Loader = loaders.ImageSegmentationLoaderCropPad elif config.execution.loader_mode == 'resize': Loader = loaders.ImageSegmentationLoaderResize else: raise NotImplementedError reader_inference = Step(name='xy_inference{}'.format(suffix), transformer=loaders.XYSplit( train_mode=False, **config.xy_splitter[model_name]), input_data=['input'], adapter=Adapter({'meta': E('input', 'meta')}), experiment_directory=config.env.experiment_dir) loader = Step(name='loader{}'.format(suffix), transformer=Loader(train_mode=False, **config.loader), input_steps=[reader_inference], adapter=Adapter({ 'X': E(reader_inference.name, 'X'), 'y': E(reader_inference.name, 'y'), }), experiment_directory=config.env.experiment_dir, cache_output=True) return loader
def network(config, suffix='', train_mode=True): if train_mode: preprocessing = emptiness_preprocessing_train(config, model_name='network', suffix=suffix) else: preprocessing = emptiness_preprocessing_inference(config, suffix=suffix) network = utils.FineTuneStep(name='network{}'.format(suffix), transformer=models.SegmentationModel(**config.model['network']), input_data=['callback_input'], input_steps=[preprocessing], adapter=Adapter({'datagen': E(preprocessing.name, 'datagen'), 'validation_datagen': E(preprocessing.name, 'validation_datagen'), 'meta_valid': E('callback_input', 'meta_valid'), }), is_trainable=True, fine_tuning=config.model.network.training_config.fine_tuning, experiment_directory=config.execution.experiment_dir) mask_resize = Step(name='mask_resize{}'.format(suffix), transformer=utils.make_apply_transformer(partial(postprocessing.resize_emptiness_predictions, target_size=config.general.original_size), output_name='resized_images', apply_on=['images']), input_steps=[network], adapter=Adapter({'images': E(network.name, 'mask_prediction'), }), experiment_directory=config.execution.experiment_dir) return mask_resize
def _credit_card_balance_groupby_agg(config, train_mode, suffix, **kwargs): credit_card_balance_groupby_agg = Step( name='credit_card_balance_groupby_agg{}'.format(suffix), transformer=fe.GroupbyAggregateMerge(**config.credit_card_balance), input_data=['application', 'credit_card_balance'], adapter=Adapter({ 'main_table': E('application', 'X'), 'side_table': E('credit_card_balance', 'X') }), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: credit_card_balance_groupby_agg_valid = Step( name='credit_card_balance_groupby_agg_valid{}'.format(suffix), transformer=credit_card_balance_groupby_agg, input_data=['application', 'credit_card_balance'], adapter=Adapter({ 'main_table': E('application', 'X_valid'), 'side_table': E('credit_card_balance', 'X') }), experiment_directory=config.pipeline.experiment_directory, **kwargs) return credit_card_balance_groupby_agg, credit_card_balance_groupby_agg_valid else: return credit_card_balance_groupby_agg
def _tap4fun(config, train_mode, suffix, **kwargs): if train_mode: tap4fun_cleaning, tap4fun_cleaning_valid = _tap4fun_cleaning( config, train_mode, suffix, **kwargs) else: tap4fun_cleaning = _tap4fun_cleaning(config, train_mode, suffix, **kwargs) tap4fun = Step(name='tap4fun_hand_crafted{}'.format(suffix), transformer=fe.Tap4funFeatures(**config.tap4fun.columns), input_steps=[tap4fun_cleaning], adapter=Adapter({'X': E(tap4fun_cleaning.name, 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: tap4fun_valid = Step( name='tap4fun_hand_crafted_valid{}'.format(suffix), transformer=tap4fun.transformer, input_steps=[tap4fun_cleaning_valid], adapter=Adapter({'X': E(tap4fun_cleaning_valid.name, 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return tap4fun, tap4fun_valid else: return tap4fun
def _tap4fun_groupby_agg(config, train_mode, suffix, **kwargs): tap4fun_groupby_agg = Step( name='tap4fun_groupby_agg{}'.format(suffix), transformer=fe.GroupbyAggregate(**config.tap4fun.aggregations), is_trainable=True, input_data=['tap4fun'], adapter=Adapter({'main_table': E('tap4fun', 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: tap4fun_groupby_agg_valid = Step( name='tap4fun_groupby_agg_valid{}'.format(suffix), transformer=tap4fun_groupby_agg.transformer, input_data=['tap4fun'], adapter=Adapter({ 'main_table': E('tap4fun', 'X_valid'), }), experiment_directory=config.pipeline.experiment_directory, **kwargs) return tap4fun_groupby_agg, tap4fun_groupby_agg_valid else: return tap4fun_groupby_agg
def select_features_from_model(features, features_valid, config, train_mode, suffix, **kwargs): select_features_step = Step( name='select_features_from_model{}'.format(suffix), transformer=fe.SelectFeaturesFromModel(threshold='median'), input_data=['tap4fun'], input_steps=[features], is_trainable=True, adapter=Adapter({ 'X': E(features.name, 'features'), 'y': E('tap4fun', 'y') }), experiment_directory=config.pipeline.experiment_directory, ) if train_mode: select_features_valid_step = Step( name='select_features_from_model_valid{}'.format(suffix), transformer=select_features_step.transformer, input_steps=[features_valid], adapter=Adapter({'X': E(features_valid.name, 'features')}), experiment_directory=config.pipeline.experiment_directory, ) return select_features_step, select_features_valid_step else: return select_features_step
def preprocessing_fillna(features, config, train_mode, suffix, **kwargs): """ impute missing value by condition """ if train_mode: features_train, features_valid = features fillna = Step( name='fillna{}'.format(suffix), transformer=_fillna( config.preprocessing.impute_missing.fill_value), input_steps=[features_train, features_valid], adapter=Adapter({ 'X': E(features_train.name, 'features'), 'X_valid': E(features_valid.name, 'features'), }), experiment_directory=config.pipeline.experiment_directory, **kwargs) else: fillna = Step( name='fillna{}'.format(suffix), transformer=_fillna( config.preprocessing.impute_missing.fill_value), input_steps=[features], adapter=Adapter({'X': E(features.name, 'features')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return fillna
def postprocessing_pipeline_simplified(cache_dirpath, loader_mode): if loader_mode == 'resize_and_pad': size_adjustment_function = partial(crop_image, target_size=ORIGINAL_SIZE) elif loader_mode == 'resize': size_adjustment_function = partial(resize_image, target_size=ORIGINAL_SIZE) else: raise NotImplementedError mask_resize = Step(name='mask_resize', transformer=make_apply_transformer(size_adjustment_function, output_name='resized_images', apply_on=['images']), input_data=['unet_output'], adapter=Adapter({'images': E('unet_output', 'mask_prediction'), }), experiment_directory=cache_dirpath) binarizer = Step(name='binarizer', transformer=make_apply_transformer( partial(binarize, threshold=THRESHOLD), output_name='binarized_images', apply_on=['images']), input_steps=[mask_resize], adapter=Adapter({'images': E(mask_resize.name, 'resized_images'), }), experiment_directory=cache_dirpath) output = Step(name='output', transformer=IdentityOperation(), input_steps=[binarizer], adapter=Adapter({'y_pred': E(binarizer.name, 'binarized_images'), }), experiment_directory=cache_dirpath) return output
def row_aggregation_features(config, train_mode, suffix, **kwargs): bucket_nrs = config.row_aggregations.bucket_nrs row_agg_features = [] for bucket_nr in bucket_nrs: row_agg_feature = Step( name='row_agg_feature_bucket_nr{}{}'.format(bucket_nr, suffix), transformer=fe.RowAggregationFeatures(bucket_nr=bucket_nr), input_data=['input'], adapter=Adapter({'X': E('input', 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) row_agg_features.append(row_agg_feature) if train_mode: row_agg_features_valid = [] for bucket_nr, row_agg_feature in zip(bucket_nrs, row_agg_features): row_agg_feature_valid = Step( name='row_agg_feature_bucket_nr{}_valid{}'.format( bucket_nr, suffix), transformer=row_agg_feature, input_data=['input'], adapter=Adapter({'X': E('input', 'X_valid')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) row_agg_features_valid.append(row_agg_feature_valid) return row_agg_features, row_agg_features_valid else: return row_agg_features
def mask_postprocessing(config, suffix=''): if config.general.loader_mode == 'crop_and_pad': size_adjustment_function = partial( crop_image, target_size=config.general.original_size) elif config.general.loader_mode == 'resize': size_adjustment_function = partial( resize_image, target_size=config.general.original_size) else: raise NotImplementedError mask_resize = Step(name='mask_resize{}'.format(suffix), transformer=make_apply_transformer( size_adjustment_function, output_name='resized_images', apply_on=['images']), input_data=['input_masks'], adapter=Adapter({ 'images': E('input_masks', 'mask_prediction'), }), experiment_directory=config.execution.experiment_dir) binarizer = Step(name='binarizer{}'.format(suffix), transformer=make_apply_transformer( partial(binarize, threshold=config.thresholder.threshold_masks), output_name='binarized_images', apply_on=['images']), input_steps=[mask_resize], adapter=Adapter({ 'images': E(mask_resize.name, 'resized_images'), }), experiment_directory=config.execution.experiment_dir) return binarizer
def _feature_by_type_splits(config, train_mode): if train_mode: feature_by_type_split = Step(name='feature_by_type_split', transformer=fe.DataFrameByTypeSplitter( **config.dataframe_by_type_splitter), input_data=['input'], adapter=Adapter({'X': E('input', 'X')}), cache_dirpath=config.env.cache_dirpath) feature_by_type_split_valid = Step( name='feature_by_type_split_valid', transformer=feature_by_type_split, input_data=['input'], adapter=Adapter({'X': E('input', 'X_valid')}), cache_dirpath=config.env.cache_dirpath) return feature_by_type_split, feature_by_type_split_valid else: feature_by_type_split = Step(name='feature_by_type_split', transformer=fe.DataFrameByTypeSplitter( **config.dataframe_by_type_splitter), input_data=['input'], adapter=Adapter({'X': E('input', 'X')}), cache_dirpath=config.env.cache_dirpath) return feature_by_type_split
def preprocessing_inference_tta(config, model_name='network'): if config.general.loader_mode == 'resize': loader_config = config.loaders.resize_tta LOADER = loaders.ImageSegmentationLoaderResizeTTA else: raise NotImplementedError reader_inference = Step(name='reader_inference', transformer=loaders.MetaReader( train_mode=False, **config.meta_reader[model_name]), input_data=['input'], adapter=Adapter({'meta': E('input', 'meta')})) tta_generator = Step(name='tta_generator', transformer=loaders.MetaTestTimeAugmentationGenerator( **config.tta_generator), input_steps=[reader_inference], adapter=Adapter({'X': E('reader_inference', 'X')})) loader = Step(name='loader', transformer=LOADER(**loader_config), input_steps=[tta_generator], adapter=Adapter({ 'X': E(tta_generator.name, 'X_tta'), 'tta_params': E(tta_generator.name, 'tta_params'), })) return loader, tta_generator
def train_segmentation_pipeline(config): preprocessing = pipelines.preprocessing_train( config, model_name='segmentation_network') segmentation_network = misc.FineTuneStep( name='segmentation_network', transformer=models.SegmentationModel( **config.model['segmentation_network']), input_data=['callback_input'], input_steps=[preprocessing], adapter=Adapter({ 'datagen': E(preprocessing.name, 'datagen'), 'validation_datagen': E(preprocessing.name, 'validation_datagen'), 'meta_valid': E('callback_input', 'meta_valid'), })) segmentation_network.set_mode_train() segmentation_network.set_parameters_upstream({ 'experiment_directory': config.execution.experiment_dir, }) segmentation_network.force_fitting = False segmentation_network.fine_tuning = config.model.segmentation_network.training_config.fine_tuning return segmentation_network
def _application(config, train_mode, suffix, **kwargs): if train_mode: application_cleaning, application_cleaning_valid = _application_cleaning( config, train_mode, suffix, **kwargs) else: application_cleaning = _application_cleaning(config, train_mode, suffix, **kwargs) application = Step( name='application_hand_crafted{}'.format(suffix), transformer=fe.ApplicationFeatures(**config.applications.columns), input_steps=[application_cleaning], adapter=Adapter({'X': E(application_cleaning.name, 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: application_valid = Step( name='application_hand_crafted_valid{}'.format(suffix), transformer=application, input_steps=[application_cleaning_valid], adapter=Adapter({'X': E(application_cleaning_valid.name, 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return application, application_valid else: return application
def _application_groupby_agg(config, train_mode, suffix, **kwargs): if train_mode: application_cleaning, application_cleaning_valid = _application_cleaning( config, train_mode, suffix, **kwargs) else: application_cleaning = _application_cleaning(config, train_mode, suffix, **kwargs) application_groupby_agg = Step( name='application_groupby_agg{}'.format(suffix), transformer=fe.GroupbyAggregateDiffs( **config.applications.aggregations), input_steps=[application_cleaning], adapter=Adapter({'main_table': E(application_cleaning.name, 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: application_groupby_agg_valid = Step( name='application_groupby_agg_valid{}'.format(suffix), transformer=application_groupby_agg, input_steps=[application_cleaning_valid], adapter=Adapter( {'main_table': E(application_cleaning_valid.name, 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return application_groupby_agg, application_groupby_agg_valid else: return application_groupby_agg
def _categorical_encoders(config, train_mode, suffix, **kwargs): categorical_encoder = Step( name='categorical_encoder{}'.format(suffix), transformer=fe.CategoricalEncoder( **config.preprocessing.categorical_encoder), input_data=['application'], adapter=Adapter({ 'X': E('application', 'X'), 'y': E('application', 'y') }), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: categorical_encoder_valid = Step( name='categorical_encoder_valid{}'.format(suffix), transformer=categorical_encoder, input_data=['application'], adapter=Adapter({ 'X': E('application', 'X_valid'), 'y': E('application', 'y_valid') }), experiment_directory=config.pipeline.experiment_directory, **kwargs) return categorical_encoder, categorical_encoder_valid else: return categorical_encoder
def test_adapter_creates_defined_keys(data): adapter = Adapter({ 'X': [E('input_1', 'features')], 'Y': [E('input_2', 'extra_features')] }) res = adapter.adapt(data) assert {'X', 'Y'} == set(res.keys())
def test_recipe_with_single_item(data): adapter = Adapter({ 'X': E('input_1', 'labels'), 'Y': E('input_3', 'labels'), }) res = adapter.adapt(data) assert np.array_equal(res['X'], data['input_1']['labels']) assert np.array_equal(res['Y'], data['input_3']['labels'])
def aggregator(name, model, tta_generator, experiment_directory, config): tta_aggregator = Step(name=name, transformer=loaders.TestTimeAugmentationAggregator(**config), input_steps=[model, tta_generator], adapter=Adapter({'images': E(model.name, 'mask_prediction'), 'tta_params': E(tta_generator.name, 'tta_params'), 'img_ids': E(tta_generator.name, 'img_ids'), }), experiment_directory=experiment_directory) return tta_aggregator