def unet(config, train_mode):
    if train_mode:
        save_output = False
        load_saved_output = False
    else:
        save_output = False
        load_saved_output = False

    loader = preprocessing(config, model_type='single', is_train=train_mode)
    unet = Step(name='unet',
                transformer=PyTorchUNet(**config.unet),
                input_steps=[loader],
                cache_dirpath=config.env.cache_dirpath,
                save_output=save_output,
                load_saved_output=load_saved_output)

    mask_postprocessed = mask_postprocessing(unet,
                                             config,
                                             save_output=save_output)
    detached = multiclass_object_labeler(mask_postprocessed,
                                         config,
                                         save_output=save_output)
    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[detached],
                  adapter={
                      'y_pred': ([(detached.name, 'labeled_images')]),
                  },
                  cache_dirpath=config.env.cache_dirpath,
                  save_output=save_output,
                  load_saved_output=False)
    return output
Exemplo n.º 2
0
def watershed_contours(mask, contour, config, save_output=True):
    watershed_contour = Step(name='watershed_contour',
                             transformer=WatershedContour(),
                             input_steps=[mask, contour],
                             adapter={
                                 'images': ([(mask.name, 'binarized_images')]),
                                 'contours':
                                 ([(contour.name, 'binarized_images')]),
                             },
                             cache_dirpath=config.env.cache_dirpath,
                             save_output=save_output)

    drop_smaller = Step(name='drop_smaller',
                        transformer=Dropper(**config.dropper),
                        input_steps=[watershed_contour],
                        adapter={
                            'labels':
                            ([('watershed_contour', 'detached_images')]),
                        },
                        cache_dirpath=config.env.cache_dirpath,
                        save_output=save_output)
    return drop_smaller

    binary_fill = Step(name='binary_fill',
                       transformer=BinaryFillHoles(),
                       input_steps=[drop_smaller],
                       adapter={
                           'images': ([('drop_smaller', 'labels')]),
                       },
                       cache_dirpath=config.env.cache_dirpath,
                       save_output=save_output)

    return binary_fill
Exemplo n.º 3
0
def _normalize(features, config, train_mode, **kwargs):
    if train_mode:
        feature_train, features_valid = features
        normalizer = Step(name='normalizer',
                          transformer=Normalizer(),
                          input_steps=[feature_train],
                          adapter={
                              'X': ([(feature_train.name, 'features')]),
                          },
                          cache_dirpath=config.env.cache_dirpath,
                          **kwargs)

        normalizer_valid = Step(name='normalizer_valid',
                                transformer=normalizer,
                                input_steps=[features_valid],
                                adapter={
                                    'X': ([(features_valid.name, 'features')]),
                                },
                                cache_dirpath=config.env.cache_dirpath,
                                **kwargs)

        return normalizer, normalizer_valid

    else:
        normalizer = Step(name='normalizer',
                          transformer=Normalizer(),
                          input_steps=[features],
                          adapter={
                              'X': ([(features.name, 'features')]),
                          },
                          cache_dirpath=config.env.cache_dirpath,
                          **kwargs)

        return normalizer
Exemplo n.º 4
0
def tfidf_logreg(config):
    preprocessed_input = _preprocessing(config, is_train=False)
    tfidf_char_vectorizer, tfidf_word_vectorizer = _tfidf(
        preprocessed_input, config)

    tfidf_logreg = Step(name='tfidf_logreg',
                        transformer=LogisticRegressionMultilabel(
                            **config.logistic_regression_multilabel),
                        input_steps=[
                            preprocessed_input, tfidf_char_vectorizer,
                            tfidf_word_vectorizer
                        ],
                        adapter={
                            'X': ([('tfidf_char_vectorizer', 'features'),
                                   ('tfidf_word_vectorizer', 'features')],
                                  sparse_hstack_inputs),
                            'y': ([('cleaning_output', 'y')]),
                        },
                        cache_dirpath=config.env.cache_dirpath)
    output = Step(name='tfidf_logreg_output',
                  transformer=Dummy(),
                  input_steps=[tfidf_logreg],
                  adapter={
                      'y_pred': ([('tfidf_logreg', 'prediction_probability')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 5
0
def bad_word_count_features_logreg(config):
    preprocessed_input = _preprocessing(config, is_train=False)
    normalizer = _count_features(config)
    xy_split = normalizer.get_step('xy_split')
    tfidf_word_vectorizer = _bad_word_tfidf(preprocessed_input, config)

    bad_word_count_logreg = Step(
        name='bad_word_count_logreg',
        transformer=LogisticRegressionMultilabel(
            **config.logistic_regression_multilabel),
        input_steps=[xy_split, normalizer, tfidf_word_vectorizer],
        adapter={
            'X': ([('normalizer', 'X'),
                   ('bad_word_tfidf_word_vectorizer', 'features')],
                  sparse_hstack_inputs),
            'y': ([('xy_split', 'y')]),
        },
        cache_dirpath=config.env.cache_dirpath)

    output = Step(name='bad_word_count_features_logreg_output',
                  transformer=Dummy(),
                  input_steps=[bad_word_count_logreg],
                  adapter={
                      'y_pred':
                      ([('bad_word_count_logreg', 'prediction_probability')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 6
0
def unet(config, train_mode):
    if train_mode:
        save_output = False
        load_saved_output = False
    else:
        save_output = False
        load_saved_output = False

    loader = preprocessing(config, model_type='single', is_train=train_mode)
    unet = Step(name='unet',
                transformer=PyTorchUNetStream(**config.unet) if config.execution.stream_mode else PyTorchUNet(
                    **config.unet),
                input_steps=[loader],
                cache_dirpath=config.env.cache_dirpath,
                save_output=save_output, load_saved_output=load_saved_output)

    mask_postprocessed = mask_postprocessing(loader, unet, config, save_output=save_output)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[mask_postprocessed],
                  adapter={'y_pred': ([(mask_postprocessed.name, 'images')]),
                           'y_scores': ([(mask_postprocessed.name, 'scores')])
                           },
                  cache_dirpath=config.env.cache_dirpath,
                  save_output=save_output,
                  load_saved_output=False)
    return output
def inference_preprocessing(config):
    xy_train = Step(name='xy_train',
                    transformer=XYSplit(**config.xy_splitter),
                    input_data=['input'],
                    adapter={'meta': ([('input', 'meta')]),
                             'train_mode': ([('input', 'train_mode')])
                             },
                    cache_dirpath=config.env.cache_dirpath)

    text_cleaner = Step(name='text_cleaner_train',
                        transformer=TextCleaner(**config.text_cleaner),
                        input_steps=[xy_train],
                        adapter={'X': ([('xy_train', 'X')])},
                        cache_dirpath=config.env.cache_dirpath)

    cleaning_output = Step(name='cleaning_output',
                           transformer=Dummy(),
                           input_data=['input'],
                           input_steps=[xy_train, text_cleaner],
                           adapter={'X': ([('text_cleaner_train', 'X')]),
                                    'y': ([('xy_train', 'y')]),
                                    'train_mode': ([('input', 'train_mode')]),
                                    },
                           cache_dirpath=config.env.cache_dirpath)
    return cleaning_output
Exemplo n.º 8
0
def glove_lstm_train(config):
    preprocessed_input = train_preprocessing(config)
    word_tokenizer, glove_embeddings = glove_preprocessing_train(
        config, preprocessed_input)
    glove_lstm = Step(
        name='glove_lstm',
        transformer=GloveLSTM(**config.glove_lstm_network),
        overwrite_transformer=True,
        input_steps=[word_tokenizer, preprocessed_input, glove_embeddings],
        adapter={
            'X': ([('word_tokenizer', 'X')]),
            'y': ([('xy_split', 'y')]),
            'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]),
            'validation_data': ([('word_tokenizer', 'X_valid'),
                                 ('xy_split', 'validation_data')], join_valid),
        },
        cache_dirpath=config.env.cache_dirpath)
    glove_output = Step(name='output_glove',
                        transformer=Dummy(),
                        input_steps=[glove_lstm],
                        adapter={
                            'y_pred':
                            ([('glove_lstm', 'prediction_probability')]),
                        },
                        cache_dirpath=config.env.cache_dirpath)
    return glove_output
Exemplo n.º 9
0
def char_vdcnn_train(config):
    preprocessed_input = train_preprocessing(config)
    char_tokenizer = Step(name='char_tokenizer',
                          transformer=Tokenizer(**config.char_tokenizer),
                          input_steps=[preprocessed_input],
                          adapter={
                              'X': ([('xy_split', 'X')], fetch_x_train),
                              'X_valid': ([('xy_split', 'validation_data')],
                                          fetch_x_valid),
                              'train_mode': ([('xy_split', 'train_mode')])
                          },
                          cache_dirpath=config.env.cache_dirpath)
    network = Step(name='char_vdcnn',
                   transformer=CharVDCNN(**config.char_vdcnn_network),
                   overwrite_transformer=True,
                   input_steps=[char_tokenizer, preprocessed_input],
                   adapter={
                       'X': ([('char_tokenizer', 'X')]),
                       'y': ([('xy_split', 'y')]),
                       'validation_data':
                       ([('char_tokenizer', 'X_valid'),
                         ('xy_split', 'validation_data')], join_valid),
                   },
                   cache_dirpath=config.env.cache_dirpath)
    char_output = Step(name='char_output',
                       transformer=Dummy(),
                       input_steps=[network],
                       adapter={
                           'y_pred':
                           ([('char_vdcnn', 'prediction_probability')]),
                       },
                       cache_dirpath=config.env.cache_dirpath)
    return char_output
def _numerical_features(clean_features, config, train_mode, **kwargs):
    if train_mode:
        clean, clean_valid = clean_features
    else:
        clean = clean_features

    numerical_features = Step(name='numerical_features',
                              transformer=fe.ProcessNumerical(),
                              input_steps=[clean],
                              adapter={
                                  'numerical_features': (
                                      [(clean.name, 'clean_features')], partial(pandas_subset_columns,
                                                                                cols=cfg.NUMERICAL_COLUMNS))
                              },
                              cache_dirpath=config.env.cache_dirpath,
                              **kwargs)
    if train_mode:
        numerical_features_valid = Step(name='numerical_features_valid',
                                        transformer=numerical_features,
                                        input_steps=[clean_valid],
                                        adapter={'numerical_features': (
                                            [(clean_valid.name, 'clean_features')], partial(pandas_subset_columns,
                                                                                            cols=cfg.NUMERICAL_COLUMNS))
                                        },
                                        cache_dirpath=config.env.cache_dirpath, **kwargs)
        return numerical_features, numerical_features_valid
    else:
        return numerical_features
def _groupby_aggregations(clean_features, additional_features, config, train_mode, **kwargs):
    if train_mode:
        clean, clean_valid = clean_features
        added_feature, added_feature_valid = additional_features
    else:
        clean = clean_features
        added_feature = additional_features

    groupby_aggregations = Step(name='groupby_aggregations',
                                transformer=fe.GroupbyAggregations(**config.groupby_aggregation),
                                input_steps=[clean, added_feature],
                                adapter={
                                    'X': ([(clean.name, 'clean_features'),
                                           (added_feature.name, 'categorical_features')],
                                          pandas_concat_inputs)
                                },
                                cache_dirpath=config.env.cache_dirpath, **kwargs)

    if train_mode:
        groupby_aggregations_valid = Step(name='groupby_aggregations_valid',
                                          transformer=groupby_aggregations,
                                          input_steps=[clean_valid, added_feature_valid],
                                          adapter={'X': ([(clean_valid.name, 'clean_features'),
                                                          (added_feature_valid.name, 'categorical_features')],
                                                         pandas_concat_inputs
                                                         )
                                                   },
                                          cache_dirpath=config.env.cache_dirpath, **kwargs)
        return groupby_aggregations, groupby_aggregations_valid
    else:
        return groupby_aggregations
def _timestamp_features(clean_features, config, train_mode, **kwargs):
    if train_mode:
        clean, clean_valid = clean_features
    else:
        clean = clean_features

    timestamp_features = Step(name='timestamp_features',
                              transformer=fe.DateFeatures(**config.date_features),
                              input_steps=[clean],
                              adapter={
                                  'timestamp_features': (
                                      [(clean.name, 'clean_features')], partial(pandas_subset_columns,
                                                                                cols=cfg.TIMESTAMP_COLUMNS))
                              },
                              cache_dirpath=config.env.cache_dirpath, **kwargs)
    if train_mode:
        timestamp_features_valid = Step(name='timestamp_features_valid',
                                        transformer=timestamp_features,
                                        input_steps=[clean_valid],
                                        adapter={'timestamp_features': (
                                            [(clean_valid.name, 'clean_features')], partial(pandas_subset_columns,
                                                                                            cols=cfg.TIMESTAMP_COLUMNS))
                                        },
                                        cache_dirpath=config.env.cache_dirpath, **kwargs)
        return timestamp_features, timestamp_features_valid
    else:
        return timestamp_features
def _encode_categorical(clean_features, config, train_mode, **kwargs):
    if train_mode:
        clean, clean_valid = clean_features
    else:
        clean = clean_features

    categorical_encoder = Step(name='categorical_encoder',
                               transformer=fe.OrdinalEncoder(**config.categorical_encoder),
                               input_steps=[clean],
                               adapter={
                                   'categorical_features': (
                                       [(clean.name, 'clean_features')], partial(pandas_subset_columns,
                                                                                 cols=cfg.CATEGORICAL_COLUMNS))
                               },
                               cache_dirpath=config.env.cache_dirpath, **kwargs)

    if train_mode:
        categorical_encoder_valid = Step(name='categorical_encoder_valid',
                                         transformer=categorical_encoder,
                                         input_steps=[clean_valid],
                                         adapter={'categorical_features': (
                                             [(clean_valid.name, 'clean_features')], partial(pandas_subset_columns,
                                                                                             cols=cfg.CATEGORICAL_COLUMNS))
                                         },
                                         cache_dirpath=config.env.cache_dirpath, **kwargs)
        return categorical_encoder, categorical_encoder_valid
    else:
        return categorical_encoder
def image_features(clean_features, config, train_mode, **kwargs):
    if train_mode:
        clean, clean_valid = clean_features
    else:
        clean = clean_features

    image_stats = Step(name='image_stats',
                       transformer=fe.ImageStatistics(**config.image_stats),
                       input_data=['specs'],
                       input_steps=[clean],
                       adapter={'X': ([(clean.name, 'clean_features')]),
                                'is_train': ([('specs', 'is_train')])},
                       cache_dirpath=config.env.cache_dirpath, **kwargs)

    if train_mode:
        image_stats_valid = Step(name='image_stats_valid',
                                 transformer=image_stats,
                                 input_data=['specs'],
                                 input_steps=[clean_valid],
                                 adapter={'X': ([(clean_valid.name, 'clean_features')]),
                                          'is_train': ([('specs', 'is_train')])},
                                 cache_dirpath=config.env.cache_dirpath, **kwargs)
        return image_stats, image_stats_valid
    else:
        return image_stats
Exemplo n.º 15
0
def bad_word_count_features_svm(config):
    preprocessed_input = inference_preprocessing(config)
    normalizer = count_features(config)
    xy_split = normalizer.get_step('xy_split')
    tfidf_word_vectorizer = bad_word_tfidf(preprocessed_input, config)

    svm_multi = Step(name='svm_multi',
                     transformer=LinearSVCMultilabel(**config.svc_multilabel),
                     input_steps=[xy_split, normalizer, tfidf_word_vectorizer],
                     adapter={
                         'X': ([('normalizer', 'X'),
                                ('bad_word_tfidf_word_vectorizer', 'features')
                                ], sparse_hstack_inputs),
                         'y': ([('xy_split', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath)

    svm_output = Step(name='svm_output',
                      transformer=Dummy(),
                      input_steps=[svm_multi],
                      adapter={
                          'y_pred':
                          ([('svm_multi', 'prediction_probability')]),
                      },
                      cache_dirpath=config.env.cache_dirpath)
    return svm_output
Exemplo n.º 16
0
def solution_1(config, train_mode):
    if train_mode:
        features, features_valid = feature_extraction(config,
                                                      train_mode,
                                                      save_output=True,
                                                      cache_output=True,
                                                      load_saved_output=True)
        light_gbm = classifier_lgbm((features, features_valid), config,
                                    train_mode)
    else:
        features = feature_extraction(config, train_mode, cache_output=True)
        light_gbm = classifier_lgbm(features, config, train_mode)

    clipper = Step(name='clipper',
                   transformer=Clipper(**config.clipper),
                   input_steps=[light_gbm],
                   adapter={
                       'prediction': ([(light_gbm.name, 'prediction')]),
                   },
                   cache_dirpath=config.env.cache_dirpath)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[clipper],
                  adapter={
                      'y_pred': ([(clipper.name, 'clipped_prediction')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 17
0
def hand_crafted_all_svm(config):
    xy_split, normalizer, char_vector, word_vector, bad_word_vector = hand_crafted_all(
        config)

    svm_multi = Step(name='svm_multi',
                     transformer=LinearSVCMultilabel(**config.svc_multilabel),
                     input_steps=[
                         xy_split, normalizer, char_vector, word_vector,
                         bad_word_vector
                     ],
                     adapter={
                         'X': ([('normalizer', 'X'),
                                ('tfidf_char_vectorizer', 'features'),
                                ('tfidf_word_vectorizer', 'features'),
                                ('bad_word_tfidf_word_vectorizer', 'features')
                                ], sparse_hstack_inputs),
                         'y': ([('xy_split', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath)

    svm_output = Step(name='svm_output',
                      transformer=Dummy(),
                      input_steps=[svm_multi],
                      adapter={
                          'y_pred':
                          ([('logreg_multi', 'prediction_probability')]),
                      },
                      cache_dirpath=config.env.cache_dirpath)
    return svm_output
Exemplo n.º 18
0
def logistic_regression_ensemble_train(config):
    model_outputs = ensemble_extraction(config)
    output_mappings = [(output_step.name, 'prediction_probability')
                       for output_step in model_outputs]

    label = model_outputs[0].get_step('xy_train')

    input_steps = model_outputs + [label]

    logreg = Step(name='logreg_ensemble',
                  transformer=LogisticRegressionMultilabel(
                      **config.logistic_regression_ensemble),
                  overwrite_transformer=True,
                  input_steps=input_steps,
                  adapter={
                      'X': (output_mappings, hstack_inputs),
                      'y': ([('xy_train', 'y')])
                  },
                  cache_dirpath=config.env.cache_dirpath)

    logreg_ensemble_output = Step(
        name='logreg_ensemble_output',
        transformer=Dummy(),
        input_steps=[logreg],
        adapter={'y_pred': ([('logreg_ensemble', 'prediction_probability')])},
        cache_dirpath=config.env.cache_dirpath)
    return logreg_ensemble_output
def word_lstm_train(config):
    preprocessed_input = train_preprocessing(config)
    word_tokenizer = Step(name='word_tokenizer',
                          transformer=Tokenizer(**config.word_tokenizer),
                          input_steps=[preprocessed_input],
                          adapter={'X': ([('cleaning_output', 'X')]),
                                   'X_valid': ([('cleaning_output', 'X_valid')]),
                                   'train_mode': ([('cleaning_output', 'train_mode')])
                                   },
                          cache_dirpath=config.env.cache_dirpath)

    word_lstm = Step(name='word_lstm',
                     transformer=WordLSTM(**config.word_lstm_network),
                     overwrite_transformer=True,
                     input_steps=[word_tokenizer, preprocessed_input],
                     adapter={'X': ([('word_tokenizer', 'X')]),
                              'y': ([('cleaning_output', 'y')]),
                              'validation_data': (
                                  [('word_tokenizer', 'X_valid'), ('cleaning_output', 'y_valid')], to_tuple_inputs),
                              },
                     cache_dirpath=config.env.cache_dirpath)
    word_output = Step(name='word_output',
                       transformer=Dummy(),
                       input_steps=[word_lstm],
                       adapter={'y_pred': ([('word_lstm', 'prediction_probability')]),
                                },
                       cache_dirpath=config.env.cache_dirpath)
    return word_output
Exemplo n.º 20
0
def random_forest_ensemble_train(config):
    model_outputs = ensemble_extraction(config)
    output_mappings = [(output_step.name, 'prediction_probability')
                       for output_step in model_outputs]

    label = model_outputs[0].get_step('xy_train')

    input_steps = model_outputs + [label]

    random_forest_ensemble = Step(
        name='random_forest_ensemble',
        transformer=RandomForestMultilabel(**config.random_forest_ensemble),
        overwrite_transformer=True,
        input_steps=input_steps,
        adapter={
            'X': (output_mappings, hstack_inputs),
            'y': ([('xy_train', 'y')])
        },
        cache_dirpath=config.env.cache_dirpath)

    random_forest_ensemble_output = Step(
        name='random_forest_ensemble_output',
        transformer=Dummy(),
        input_steps=[random_forest_ensemble],
        adapter={
            'y_pred': ([('random_forest_ensemble', 'prediction_probability')])
        },
        cache_dirpath=config.env.cache_dirpath)
    return random_forest_ensemble_output
Exemplo n.º 21
0
def _preprocessing_single_generator(config, is_train, use_patching):
    if use_patching:
        raise NotImplementedError
    else:
        if is_train:
            xy_train = Step(name='xy_train',
                            transformer=XYSplit(**config.xy_splitter),
                            input_data=['input'],
                            adapter={
                                'meta': ([('input', 'meta')]),
                                'train_mode': ([('input', 'train_mode')])
                            },
                            cache_dirpath=config.env.cache_dirpath)

            xy_inference = Step(name='xy_inference',
                                transformer=XYSplit(**config.xy_splitter),
                                input_data=['input'],
                                adapter={
                                    'meta': ([('input', 'meta_valid')]),
                                    'train_mode': ([('input', 'train_mode')])
                                },
                                cache_dirpath=config.env.cache_dirpath)

            loader = Step(name='loader',
                          transformer=loaders.MetadataImageSegmentationLoader(
                              **config.loader),
                          input_data=['input'],
                          input_steps=[xy_train, xy_inference],
                          adapter={
                              'X': ([('xy_train', 'X')], squeeze_inputs),
                              'y': ([('xy_train', 'y')], squeeze_inputs),
                              'train_mode': ([('input', 'train_mode')]),
                              'X_valid':
                              ([('xy_inference', 'X')], squeeze_inputs),
                              'y_valid':
                              ([('xy_inference', 'y')], squeeze_inputs),
                          },
                          cache_dirpath=config.env.cache_dirpath)
        else:
            xy_inference = Step(name='xy_inference',
                                transformer=XYSplit(**config.xy_splitter),
                                input_data=['input'],
                                adapter={
                                    'meta': ([('input', 'meta')]),
                                    'train_mode': ([('input', 'train_mode')])
                                },
                                cache_dirpath=config.env.cache_dirpath)

            loader = Step(name='loader',
                          transformer=loaders.MetadataImageSegmentationLoader(
                              **config.loader),
                          input_data=['input'],
                          input_steps=[xy_inference, xy_inference],
                          adapter={
                              'X': ([('xy_inference', 'X')], squeeze_inputs),
                              'y': ([('xy_inference', 'y')], squeeze_inputs),
                              'train_mode': ([('input', 'train_mode')]),
                          },
                          cache_dirpath=config.env.cache_dirpath)
    return loader
Exemplo n.º 22
0
def char_vdcnn_inference(config):
    preprocessed_input = inference_preprocessing(config)
    char_tokenizer = Step(name='char_tokenizer',
                          transformer=Tokenizer(**config.char_tokenizer),
                          input_steps=[preprocessed_input],
                          adapter={
                              'X': ([('cleaning_output', 'X')]),
                              'train_mode':
                              ([('cleaning_output', 'train_mode')])
                          },
                          cache_dirpath=config.env.cache_dirpath)
    network = Step(name='char_vdcnn',
                   transformer=CharVDCNN(**config.char_vdcnn_network),
                   input_steps=[char_tokenizer, preprocessed_input],
                   adapter={
                       'X': ([('char_tokenizer', 'X')]),
                       'y': ([('cleaning_output', 'y')]),
                   },
                   cache_dirpath=config.env.cache_dirpath)
    char_output = Step(name='char_output',
                       transformer=Dummy(),
                       input_steps=[network],
                       adapter={
                           'y_pred':
                           ([('char_vdcnn', 'prediction_probability')]),
                       },
                       cache_dirpath=config.env.cache_dirpath)
    return char_output
Exemplo n.º 23
0
def char_vdcnn(config, is_train):
    preprocessed_input = _preprocessing(config, is_train)
    char_tokenizer = _char_tokenizer(preprocessed_input, config, is_train)

    if is_train:
        network = Step(name='char_vdcnn',
                       transformer=CharVDCNN(**config.char_vdcnn_network),
                       overwrite_transformer=True,
                       input_steps=[char_tokenizer, preprocessed_input],
                       adapter={
                           'X': ([('char_tokenizer', 'X')]),
                           'y': ([('cleaning_output', 'y')]),
                           'validation_data':
                           ([('char_tokenizer', 'X_valid'),
                             ('cleaning_output', 'y_valid')], to_tuple_inputs),
                       },
                       cache_dirpath=config.env.cache_dirpath)
    else:
        network = Step(name='char_vdcnn',
                       transformer=CharVDCNN(**config.char_vdcnn_network),
                       input_steps=[char_tokenizer, preprocessed_input],
                       adapter={
                           'X': ([('char_tokenizer', 'X')]),
                           'y': ([('cleaning_output', 'y')]),
                       },
                       cache_dirpath=config.env.cache_dirpath)
    output = Step(name='char_vdcnn_output',
                  transformer=Dummy(),
                  input_steps=[network],
                  adapter={
                      'y_pred': ([('char_vdcnn', 'prediction_probability')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 24
0
def word_lstm_inference(config):
    preprocessed_input = inference_preprocessing(config)
    word_tokenizer = Step(name='word_tokenizer',
                          transformer=Tokenizer(**config.word_tokenizer),
                          input_steps=[preprocessed_input],
                          adapter={
                              'X': ([('cleaning_output', 'X')]),
                              'train_mode':
                              ([('cleaning_output', 'train_mode')])
                          },
                          cache_dirpath=config.env.cache_dirpath)
    word_lstm = Step(name='word_lstm',
                     transformer=WordLSTM(**config.word_lstm_network),
                     input_steps=[word_tokenizer, preprocessed_input],
                     adapter={
                         'X': ([('word_tokenizer', 'X')]),
                         'y': ([('cleaning_output', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath)
    word_output = Step(name='word_output',
                       transformer=Dummy(),
                       input_steps=[word_lstm],
                       adapter={
                           'y_pred':
                           ([('word_lstm', 'prediction_probability')]),
                       },
                       cache_dirpath=config.env.cache_dirpath)
    return word_output
Exemplo n.º 25
0
def unet(config, train_mode):
    if train_mode:
        save_output = True
        load_saved_output = False
        preprocessing = preprocessing_train(config)
    else:
        save_output = True
        load_saved_output = False
        preprocessing = preprocessing_inference(config)

    unet = Step(name='unet',
                transformer=PyTorchUNet(**config.unet),
                input_steps=[preprocessing],
                cache_dirpath=config.env.cache_dirpath,
                save_output=save_output,
                load_saved_output=load_saved_output)

    mask_postprocessed = mask_postprocessing(unet,
                                             config,
                                             save_output=save_output)

    detached = nuclei_labeler(mask_postprocessed,
                              config,
                              save_output=save_output)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[detached],
                  adapter={
                      'y_pred': ([(detached.name, 'labels')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 26
0
def glove_dpcnn_train(config):
    preprocessed_input = train_preprocessing(config)
    word_tokenizer, glove_embeddings = glove_preprocessing_train(
        config, preprocessed_input)
    glove_dpcnn = Step(
        name='glove_dpcnn',
        transformer=GloveDPCNN(**config.glove_dpcnn_network),
        overwrite_transformer=True,
        input_steps=[word_tokenizer, preprocessed_input, glove_embeddings],
        adapter={
            'X': ([('word_tokenizer', 'X')]),
            'y': ([('cleaning_output', 'y')]),
            'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]),
            'validation_data':
            ([('word_tokenizer', 'X_valid'),
              ('cleaning_output', 'y_valid')], to_tuple_inputs),
        },
        cache_dirpath=config.env.cache_dirpath)
    glove_output = Step(name='output_glove',
                        transformer=Dummy(),
                        input_steps=[glove_dpcnn],
                        adapter={
                            'y_pred':
                            ([('glove_dpcnn', 'prediction_probability')]),
                        },
                        cache_dirpath=config.env.cache_dirpath)
    return glove_output
Exemplo n.º 27
0
def unet_multitask(config, train_mode):
    if train_mode:
        save_output = True
        load_saved_output = False
        preprocessing = preprocessing_multitask_train(config)
    else:
        save_output = True
        load_saved_output = False
        preprocessing = preprocessing_multitask_inference(config)

    unet_multitask = Step(name='unet_multitask',
                          transformer=PyTorchUNetMultitask(**config.unet),
                          input_steps=[preprocessing],
                          cache_dirpath=config.env.cache_dirpath,
                          save_output=save_output,
                          load_saved_output=load_saved_output)

    mask_resize = Step(name='mask_resize',
                       transformer=Resizer(),
                       input_data=['input'],
                       input_steps=[unet_multitask],
                       adapter={
                           'images':
                           ([(unet_multitask.name, 'mask_prediction')]),
                           'target_sizes': ([('input', 'target_sizes')]),
                       },
                       cache_dirpath=config.env.cache_dirpath,
                       save_output=save_output)

    contour_resize = Step(name='contour_resize',
                          transformer=Resizer(),
                          input_data=['input'],
                          input_steps=[unet_multitask],
                          adapter={
                              'images':
                              ([(unet_multitask.name, 'contour_prediction')]),
                              'target_sizes': ([('input', 'target_sizes')]),
                          },
                          cache_dirpath=config.env.cache_dirpath,
                          save_output=save_output)

    detached = Step(name='detached',
                    transformer=Postprocessor(),
                    input_steps=[mask_resize, contour_resize],
                    adapter={
                        'images': ([(mask_resize.name, 'resized_images')]),
                        'contours':
                        ([(contour_resize.name, 'resized_images')]),
                    },
                    cache_dirpath=config.env.cache_dirpath,
                    save_output=save_output)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[detached],
                  adapter={
                      'y_pred': ([(detached.name, 'labeled_images')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 28
0
def tfidf_svm(config):
    preprocessed_input = inference_preprocessing(config)
    tfidf_char_vectorizer, tfidf_word_vectorizer = tfidf(
        preprocessed_input, config)

    svm_multi = Step(name='svm_multi',
                     transformer=LinearSVCMultilabel(**config.svc_multilabel),
                     input_steps=[
                         preprocessed_input, tfidf_char_vectorizer,
                         tfidf_word_vectorizer
                     ],
                     adapter={
                         'X': ([('tfidf_char_vectorizer', 'features'),
                                ('tfidf_word_vectorizer', 'features')],
                               sparse_hstack_inputs),
                         'y': ([('cleaning_output', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath)
    svm_output = Step(name='svm_output',
                      transformer=Dummy(),
                      input_steps=[svm_multi],
                      adapter={
                          'y_pred':
                          ([('logreg_multi', 'prediction_probability')]),
                      },
                      cache_dirpath=config.env.cache_dirpath)
    return svm_output
Exemplo n.º 29
0
def _feature_by_type_splits(config, train_mode):
    if train_mode:
        feature_by_type_split = Step(name='feature_by_type_split',
                                     transformer=fe.DataFrameByTypeSplitter(
                                         **config.dataframe_by_type_splitter),
                                     input_data=['input'],
                                     adapter={
                                         'X': ([('input', 'X')]),
                                     },
                                     cache_dirpath=config.env.cache_dirpath)

        feature_by_type_split_valid = Step(
            name='feature_by_type_split_valid',
            transformer=feature_by_type_split,
            input_data=['input'],
            adapter={
                'X': ([('input', 'X_valid')]),
            },
            cache_dirpath=config.env.cache_dirpath)

        return feature_by_type_split, feature_by_type_split_valid

    else:
        feature_by_type_split = Step(name='feature_by_type_split',
                                     transformer=fe.DataFrameByTypeSplitter(
                                         **config.dataframe_by_type_splitter),
                                     input_data=['input'],
                                     adapter={
                                         'X': ([('input', 'X')]),
                                     },
                                     cache_dirpath=config.env.cache_dirpath)

        return feature_by_type_split
Exemplo n.º 30
0
def preprocessing_generator_padded_tta(config):
    xy_inference = Step(name='xy_inference',
                        transformer=XYSplit(**config.xy_splitter),
                        input_data=['input', 'specs'],
                        adapter={
                            'meta': ([('input', 'meta')]),
                            'train_mode': ([('specs', 'train_mode')])
                        },
                        cache_dirpath=config.env.cache_dirpath)

    tta_generator = Step(name='tta_generator',
                         transformer=loaders.TestTimeAugmentationGenerator(
                             **config.tta_generator),
                         input_steps=[xy_inference],
                         adapter={
                             'X': ([('xy_inference', 'X')]),
                         },
                         cache_dirpath=config.env.cache_dirpath)

    loader = Step(
        name='loader',
        transformer=loaders.ImageSegmentationLoaderInferencePaddingTTA(
            **config.loader),
        input_steps=[xy_inference, tta_generator],
        adapter={
            'X': ([(tta_generator.name, 'X_tta')], squeeze_inputs),
            'tta_params':
            ([(tta_generator.name, 'tta_params')], squeeze_inputs),
        },
        cache_dirpath=config.env.cache_dirpath)
    return loader, tta_generator