Exemplo n.º 1
0
def feature_extraction_v0(config, train_mode, **kwargs):
    if train_mode:
        feature_by_type_split, feature_by_type_split_valid = _feature_by_type_splits(
            config, train_mode)
        categorical_features = Step(name='categorical_features',
                                    transformer=Dummy(),
                                    input_steps=[feature_by_type_split],
                                    adapter={
                                        'categorical_features':
                                        ([(feature_by_type_split.name,
                                           'categorical_features')]),
                                    },
                                    cache_dirpath=config.env.cache_dirpath,
                                    **kwargs)
        categorical_features_valid = Step(
            name='categorical_features_valid',
            transformer=Dummy(),
            input_steps=[feature_by_type_split_valid],
            adapter={
                'categorical_features':
                ([(feature_by_type_split_valid.name, 'categorical_features')]),
            },
            cache_dirpath=config.env.cache_dirpath,
            **kwargs)
        feature_combiner = _join_features(
            numerical_features=[],
            numerical_features_valid=[],
            categorical_features=[categorical_features],
            categorical_features_valid=[categorical_features_valid],
            config=config,
            train_mode=train_mode,
            **kwargs)
        return feature_combiner
    else:
        feature_by_type_split = _feature_by_type_splits(config, train_mode)
        categorical_features = Step(name='categorical_features',
                                    transformer=Dummy(),
                                    input_steps=[feature_by_type_split],
                                    adapter={
                                        'categorical_features':
                                        ([(feature_by_type_split.name,
                                           'categorical_features')]),
                                    },
                                    cache_dirpath=config.env.cache_dirpath,
                                    **kwargs)
        feature_combiner = _join_features(
            numerical_features=[],
            numerical_features_valid=[],
            categorical_features=[categorical_features],
            categorical_features_valid=[],
            config=config,
            train_mode=train_mode,
            **kwargs)
        return feature_combiner
Exemplo n.º 2
0
def char_vdcnn_train(config):
    preprocessed_input = train_preprocessing(config)
    char_tokenizer = Step(name='char_tokenizer',
                          transformer=Tokenizer(**config.char_tokenizer),
                          input_steps=[preprocessed_input],
                          adapter={
                              'X': ([('xy_split', 'X')], fetch_x_train),
                              'X_valid': ([('xy_split', 'validation_data')],
                                          fetch_x_valid),
                              'train_mode': ([('xy_split', 'train_mode')])
                          },
                          cache_dirpath=config.env.cache_dirpath)
    network = Step(name='char_vdcnn',
                   transformer=CharVDCNN(**config.char_vdcnn_network),
                   overwrite_transformer=True,
                   input_steps=[char_tokenizer, preprocessed_input],
                   adapter={
                       'X': ([('char_tokenizer', 'X')]),
                       'y': ([('xy_split', 'y')]),
                       'validation_data':
                       ([('char_tokenizer', 'X_valid'),
                         ('xy_split', 'validation_data')], join_valid),
                   },
                   cache_dirpath=config.env.cache_dirpath)
    char_output = Step(name='char_output',
                       transformer=Dummy(),
                       input_steps=[network],
                       adapter={
                           'y_pred':
                           ([('char_vdcnn', 'prediction_probability')]),
                       },
                       cache_dirpath=config.env.cache_dirpath)
    return char_output
Exemplo n.º 3
0
def glove_lstm_train(config):
    preprocessed_input = train_preprocessing(config)
    word_tokenizer, glove_embeddings = glove_preprocessing_train(
        config, preprocessed_input)
    glove_lstm = Step(
        name='glove_lstm',
        transformer=GloveLSTM(**config.glove_lstm_network),
        overwrite_transformer=True,
        input_steps=[word_tokenizer, preprocessed_input, glove_embeddings],
        adapter={
            'X': ([('word_tokenizer', 'X')]),
            'y': ([('xy_split', 'y')]),
            'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]),
            'validation_data': ([('word_tokenizer', 'X_valid'),
                                 ('xy_split', 'validation_data')], join_valid),
        },
        cache_dirpath=config.env.cache_dirpath)
    glove_output = Step(name='output_glove',
                        transformer=Dummy(),
                        input_steps=[glove_lstm],
                        adapter={
                            'y_pred':
                            ([('glove_lstm', 'prediction_probability')]),
                        },
                        cache_dirpath=config.env.cache_dirpath)
    return glove_output
def unet(config, train_mode):
    if train_mode:
        save_output = False
        load_saved_output = False
    else:
        save_output = False
        load_saved_output = False

    loader = preprocessing(config, model_type='single', is_train=train_mode)
    unet = Step(name='unet',
                transformer=PyTorchUNet(**config.unet),
                input_steps=[loader],
                cache_dirpath=config.env.cache_dirpath,
                save_output=save_output,
                load_saved_output=load_saved_output)

    mask_postprocessed = mask_postprocessing(unet,
                                             config,
                                             save_output=save_output)
    detached = multiclass_object_labeler(mask_postprocessed,
                                         config,
                                         save_output=save_output)
    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[detached],
                  adapter={
                      'y_pred': ([(detached.name, 'labeled_images')]),
                  },
                  cache_dirpath=config.env.cache_dirpath,
                  save_output=save_output,
                  load_saved_output=False)
    return output
Exemplo n.º 5
0
def solution_1(config, train_mode):
    if train_mode:
        features, features_valid = feature_extraction(config,
                                                      train_mode,
                                                      save_output=True,
                                                      cache_output=True,
                                                      load_saved_output=True)
        light_gbm = classifier_lgbm((features, features_valid), config,
                                    train_mode)
    else:
        features = feature_extraction(config, train_mode, cache_output=True)
        light_gbm = classifier_lgbm(features, config, train_mode)

    clipper = Step(name='clipper',
                   transformer=Clipper(**config.clipper),
                   input_steps=[light_gbm],
                   adapter={
                       'prediction': ([(light_gbm.name, 'prediction')]),
                   },
                   cache_dirpath=config.env.cache_dirpath)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[clipper],
                  adapter={
                      'y_pred': ([(clipper.name, 'clipped_prediction')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 6
0
def unet(config, train_mode):
    if train_mode:
        save_output = False
        load_saved_output = False
    else:
        save_output = False
        load_saved_output = False

    loader = preprocessing(config, model_type='single', is_train=train_mode)
    unet = Step(name='unet',
                transformer=PyTorchUNetStream(**config.unet) if config.execution.stream_mode else PyTorchUNet(
                    **config.unet),
                input_steps=[loader],
                cache_dirpath=config.env.cache_dirpath,
                save_output=save_output, load_saved_output=load_saved_output)

    mask_postprocessed = mask_postprocessing(loader, unet, config, save_output=save_output)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[mask_postprocessed],
                  adapter={'y_pred': ([(mask_postprocessed.name, 'images')]),
                           'y_scores': ([(mask_postprocessed.name, 'scores')])
                           },
                  cache_dirpath=config.env.cache_dirpath,
                  save_output=save_output,
                  load_saved_output=False)
    return output
Exemplo n.º 7
0
def bad_word_count_features_svm(config):
    preprocessed_input = inference_preprocessing(config)
    normalizer = count_features(config)
    xy_split = normalizer.get_step('xy_split')
    tfidf_word_vectorizer = bad_word_tfidf(preprocessed_input, config)

    svm_multi = Step(name='svm_multi',
                     transformer=LinearSVCMultilabel(**config.svc_multilabel),
                     input_steps=[xy_split, normalizer, tfidf_word_vectorizer],
                     adapter={
                         'X': ([('normalizer', 'X'),
                                ('bad_word_tfidf_word_vectorizer', 'features')
                                ], sparse_hstack_inputs),
                         'y': ([('xy_split', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath)

    svm_output = Step(name='svm_output',
                      transformer=Dummy(),
                      input_steps=[svm_multi],
                      adapter={
                          'y_pred':
                          ([('svm_multi', 'prediction_probability')]),
                      },
                      cache_dirpath=config.env.cache_dirpath)
    return svm_output
Exemplo n.º 8
0
def glove_dpcnn_train(config):
    preprocessed_input = train_preprocessing(config)
    word_tokenizer, glove_embeddings = glove_preprocessing_train(
        config, preprocessed_input)
    glove_dpcnn = Step(
        name='glove_dpcnn',
        transformer=GloveDPCNN(**config.glove_dpcnn_network),
        overwrite_transformer=True,
        input_steps=[word_tokenizer, preprocessed_input, glove_embeddings],
        adapter={
            'X': ([('word_tokenizer', 'X')]),
            'y': ([('cleaning_output', 'y')]),
            'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]),
            'validation_data':
            ([('word_tokenizer', 'X_valid'),
              ('cleaning_output', 'y_valid')], to_tuple_inputs),
        },
        cache_dirpath=config.env.cache_dirpath)
    glove_output = Step(name='output_glove',
                        transformer=Dummy(),
                        input_steps=[glove_dpcnn],
                        adapter={
                            'y_pred':
                            ([('glove_dpcnn', 'prediction_probability')]),
                        },
                        cache_dirpath=config.env.cache_dirpath)
    return glove_output
Exemplo n.º 9
0
def tfidf_svm(config):
    preprocessed_input = inference_preprocessing(config)
    tfidf_char_vectorizer, tfidf_word_vectorizer = tfidf(
        preprocessed_input, config)

    svm_multi = Step(name='svm_multi',
                     transformer=LinearSVCMultilabel(**config.svc_multilabel),
                     input_steps=[
                         preprocessed_input, tfidf_char_vectorizer,
                         tfidf_word_vectorizer
                     ],
                     adapter={
                         'X': ([('tfidf_char_vectorizer', 'features'),
                                ('tfidf_word_vectorizer', 'features')],
                               sparse_hstack_inputs),
                         'y': ([('cleaning_output', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath)
    svm_output = Step(name='svm_output',
                      transformer=Dummy(),
                      input_steps=[svm_multi],
                      adapter={
                          'y_pred':
                          ([('logreg_multi', 'prediction_probability')]),
                      },
                      cache_dirpath=config.env.cache_dirpath)
    return svm_output
Exemplo n.º 10
0
def word_lstm_inference(config):
    preprocessed_input = inference_preprocessing(config)
    word_tokenizer = Step(name='word_tokenizer',
                          transformer=Tokenizer(**config.word_tokenizer),
                          input_steps=[preprocessed_input],
                          adapter={
                              'X': ([('cleaning_output', 'X')]),
                              'train_mode':
                              ([('cleaning_output', 'train_mode')])
                          },
                          cache_dirpath=config.env.cache_dirpath)
    word_lstm = Step(name='word_lstm',
                     transformer=WordLSTM(**config.word_lstm_network),
                     input_steps=[word_tokenizer, preprocessed_input],
                     adapter={
                         'X': ([('word_tokenizer', 'X')]),
                         'y': ([('cleaning_output', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath)
    word_output = Step(name='word_output',
                       transformer=Dummy(),
                       input_steps=[word_lstm],
                       adapter={
                           'y_pred':
                           ([('word_lstm', 'prediction_probability')]),
                       },
                       cache_dirpath=config.env.cache_dirpath)
    return word_output
Exemplo n.º 11
0
def char_vdcnn_inference(config):
    preprocessed_input = inference_preprocessing(config)
    char_tokenizer = Step(name='char_tokenizer',
                          transformer=Tokenizer(**config.char_tokenizer),
                          input_steps=[preprocessed_input],
                          adapter={
                              'X': ([('cleaning_output', 'X')]),
                              'train_mode':
                              ([('cleaning_output', 'train_mode')])
                          },
                          cache_dirpath=config.env.cache_dirpath)
    network = Step(name='char_vdcnn',
                   transformer=CharVDCNN(**config.char_vdcnn_network),
                   input_steps=[char_tokenizer, preprocessed_input],
                   adapter={
                       'X': ([('char_tokenizer', 'X')]),
                       'y': ([('cleaning_output', 'y')]),
                   },
                   cache_dirpath=config.env.cache_dirpath)
    char_output = Step(name='char_output',
                       transformer=Dummy(),
                       input_steps=[network],
                       adapter={
                           'y_pred':
                           ([('char_vdcnn', 'prediction_probability')]),
                       },
                       cache_dirpath=config.env.cache_dirpath)
    return char_output
Exemplo n.º 12
0
def random_forest_ensemble_train(config):
    model_outputs = ensemble_extraction(config)
    output_mappings = [(output_step.name, 'prediction_probability')
                       for output_step in model_outputs]

    label = model_outputs[0].get_step('xy_train')

    input_steps = model_outputs + [label]

    random_forest_ensemble = Step(
        name='random_forest_ensemble',
        transformer=RandomForestMultilabel(**config.random_forest_ensemble),
        overwrite_transformer=True,
        input_steps=input_steps,
        adapter={
            'X': (output_mappings, hstack_inputs),
            'y': ([('xy_train', 'y')])
        },
        cache_dirpath=config.env.cache_dirpath)

    random_forest_ensemble_output = Step(
        name='random_forest_ensemble_output',
        transformer=Dummy(),
        input_steps=[random_forest_ensemble],
        adapter={
            'y_pred': ([('random_forest_ensemble', 'prediction_probability')])
        },
        cache_dirpath=config.env.cache_dirpath)
    return random_forest_ensemble_output
Exemplo n.º 13
0
def hand_crafted_all_svm(config):
    xy_split, normalizer, char_vector, word_vector, bad_word_vector = hand_crafted_all(
        config)

    svm_multi = Step(name='svm_multi',
                     transformer=LinearSVCMultilabel(**config.svc_multilabel),
                     input_steps=[
                         xy_split, normalizer, char_vector, word_vector,
                         bad_word_vector
                     ],
                     adapter={
                         'X': ([('normalizer', 'X'),
                                ('tfidf_char_vectorizer', 'features'),
                                ('tfidf_word_vectorizer', 'features'),
                                ('bad_word_tfidf_word_vectorizer', 'features')
                                ], sparse_hstack_inputs),
                         'y': ([('xy_split', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath)

    svm_output = Step(name='svm_output',
                      transformer=Dummy(),
                      input_steps=[svm_multi],
                      adapter={
                          'y_pred':
                          ([('logreg_multi', 'prediction_probability')]),
                      },
                      cache_dirpath=config.env.cache_dirpath)
    return svm_output
Exemplo n.º 14
0
def tfidf_logreg(config):
    preprocessed_input = _preprocessing(config, is_train=False)
    tfidf_char_vectorizer, tfidf_word_vectorizer = _tfidf(
        preprocessed_input, config)

    tfidf_logreg = Step(name='tfidf_logreg',
                        transformer=LogisticRegressionMultilabel(
                            **config.logistic_regression_multilabel),
                        input_steps=[
                            preprocessed_input, tfidf_char_vectorizer,
                            tfidf_word_vectorizer
                        ],
                        adapter={
                            'X': ([('tfidf_char_vectorizer', 'features'),
                                   ('tfidf_word_vectorizer', 'features')],
                                  sparse_hstack_inputs),
                            'y': ([('cleaning_output', 'y')]),
                        },
                        cache_dirpath=config.env.cache_dirpath)
    output = Step(name='tfidf_logreg_output',
                  transformer=Dummy(),
                  input_steps=[tfidf_logreg],
                  adapter={
                      'y_pred': ([('tfidf_logreg', 'prediction_probability')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 15
0
def unet(config, train_mode):
    save_output = False
    load_saved_output = False

    loader = preprocessing_generator(config, is_train=train_mode)
    unet = Step(name='unet',
                transformer=PyTorchUNetStream(**config.unet) if
                config.execution.stream_mode else PyTorchUNet(**config.unet),
                input_data=['callback_input'],
                input_steps=[loader],
                cache_dirpath=config.env.cache_dirpath,
                save_output=save_output,
                load_saved_output=load_saved_output)

    mask_postprocessed = mask_postprocessing(loader,
                                             unet,
                                             config,
                                             save_output=save_output)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[mask_postprocessed],
                  adapter={
                      'y_pred':
                      ([(mask_postprocessed.name, 'images_with_scores')]),
                  },
                  cache_dirpath=config.env.cache_dirpath,
                  save_output=save_output,
                  load_saved_output=False)
    return output
Exemplo n.º 16
0
def unet(config, train_mode):
    if train_mode:
        save_output = True
        load_saved_output = False
        preprocessing = preprocessing_train(config)
    else:
        save_output = True
        load_saved_output = False
        preprocessing = preprocessing_inference(config)

    unet = Step(name='unet',
                transformer=PyTorchUNet(**config.unet),
                input_steps=[preprocessing],
                cache_dirpath=config.env.cache_dirpath,
                save_output=save_output,
                load_saved_output=load_saved_output)

    mask_postprocessed = mask_postprocessing(unet,
                                             config,
                                             save_output=save_output)

    detached = nuclei_labeler(mask_postprocessed,
                              config,
                              save_output=save_output)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[detached],
                  adapter={
                      'y_pred': ([(detached.name, 'labels')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 17
0
def unet_multitask(config, train_mode):
    if train_mode:
        save_output = True
        load_saved_output = False
        preprocessing = preprocessing_multitask_train(config)
    else:
        save_output = True
        load_saved_output = False
        preprocessing = preprocessing_multitask_inference(config)

    unet_multitask = Step(name='unet_multitask',
                          transformer=PyTorchUNetMultitask(**config.unet),
                          input_steps=[preprocessing],
                          cache_dirpath=config.env.cache_dirpath,
                          save_output=save_output,
                          load_saved_output=load_saved_output)

    mask_resize = Step(name='mask_resize',
                       transformer=Resizer(),
                       input_data=['input'],
                       input_steps=[unet_multitask],
                       adapter={
                           'images':
                           ([(unet_multitask.name, 'mask_prediction')]),
                           'target_sizes': ([('input', 'target_sizes')]),
                       },
                       cache_dirpath=config.env.cache_dirpath,
                       save_output=save_output)

    contour_resize = Step(name='contour_resize',
                          transformer=Resizer(),
                          input_data=['input'],
                          input_steps=[unet_multitask],
                          adapter={
                              'images':
                              ([(unet_multitask.name, 'contour_prediction')]),
                              'target_sizes': ([('input', 'target_sizes')]),
                          },
                          cache_dirpath=config.env.cache_dirpath,
                          save_output=save_output)

    detached = Step(name='detached',
                    transformer=Postprocessor(),
                    input_steps=[mask_resize, contour_resize],
                    adapter={
                        'images': ([(mask_resize.name, 'resized_images')]),
                        'contours':
                        ([(contour_resize.name, 'resized_images')]),
                    },
                    cache_dirpath=config.env.cache_dirpath,
                    save_output=save_output)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[detached],
                  adapter={
                      'y_pred': ([(detached.name, 'labeled_images')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 18
0
def bad_word_count_features_logreg(config):
    preprocessed_input = _preprocessing(config, is_train=False)
    normalizer = _count_features(config)
    xy_split = normalizer.get_step('xy_split')
    tfidf_word_vectorizer = _bad_word_tfidf(preprocessed_input, config)

    bad_word_count_logreg = Step(
        name='bad_word_count_logreg',
        transformer=LogisticRegressionMultilabel(
            **config.logistic_regression_multilabel),
        input_steps=[xy_split, normalizer, tfidf_word_vectorizer],
        adapter={
            'X': ([('normalizer', 'X'),
                   ('bad_word_tfidf_word_vectorizer', 'features')],
                  sparse_hstack_inputs),
            'y': ([('xy_split', 'y')]),
        },
        cache_dirpath=config.env.cache_dirpath)

    output = Step(name='bad_word_count_features_logreg_output',
                  transformer=Dummy(),
                  input_steps=[bad_word_count_logreg],
                  adapter={
                      'y_pred':
                      ([('bad_word_count_logreg', 'prediction_probability')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 19
0
def char_vdcnn(config, is_train):
    preprocessed_input = _preprocessing(config, is_train)
    char_tokenizer = _char_tokenizer(preprocessed_input, config, is_train)

    if is_train:
        network = Step(name='char_vdcnn',
                       transformer=CharVDCNN(**config.char_vdcnn_network),
                       overwrite_transformer=True,
                       input_steps=[char_tokenizer, preprocessed_input],
                       adapter={
                           'X': ([('char_tokenizer', 'X')]),
                           'y': ([('cleaning_output', 'y')]),
                           'validation_data':
                           ([('char_tokenizer', 'X_valid'),
                             ('cleaning_output', 'y_valid')], to_tuple_inputs),
                       },
                       cache_dirpath=config.env.cache_dirpath)
    else:
        network = Step(name='char_vdcnn',
                       transformer=CharVDCNN(**config.char_vdcnn_network),
                       input_steps=[char_tokenizer, preprocessed_input],
                       adapter={
                           'X': ([('char_tokenizer', 'X')]),
                           'y': ([('cleaning_output', 'y')]),
                       },
                       cache_dirpath=config.env.cache_dirpath)
    output = Step(name='char_vdcnn_output',
                  transformer=Dummy(),
                  input_steps=[network],
                  adapter={
                      'y_pred': ([('char_vdcnn', 'prediction_probability')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 20
0
def logistic_regression_ensemble_train(config):
    model_outputs = ensemble_extraction(config)
    output_mappings = [(output_step.name, 'prediction_probability')
                       for output_step in model_outputs]

    label = model_outputs[0].get_step('xy_train')

    input_steps = model_outputs + [label]

    logreg = Step(name='logreg_ensemble',
                  transformer=LogisticRegressionMultilabel(
                      **config.logistic_regression_ensemble),
                  overwrite_transformer=True,
                  input_steps=input_steps,
                  adapter={
                      'X': (output_mappings, hstack_inputs),
                      'y': ([('xy_train', 'y')])
                  },
                  cache_dirpath=config.env.cache_dirpath)

    logreg_ensemble_output = Step(
        name='logreg_ensemble_output',
        transformer=Dummy(),
        input_steps=[logreg],
        adapter={'y_pred': ([('logreg_ensemble', 'prediction_probability')])},
        cache_dirpath=config.env.cache_dirpath)
    return logreg_ensemble_output
def inference_preprocessing(config):
    xy_train = Step(name='xy_train',
                    transformer=XYSplit(**config.xy_splitter),
                    input_data=['input'],
                    adapter={'meta': ([('input', 'meta')]),
                             'train_mode': ([('input', 'train_mode')])
                             },
                    cache_dirpath=config.env.cache_dirpath)

    text_cleaner = Step(name='text_cleaner_train',
                        transformer=TextCleaner(**config.text_cleaner),
                        input_steps=[xy_train],
                        adapter={'X': ([('xy_train', 'X')])},
                        cache_dirpath=config.env.cache_dirpath)

    cleaning_output = Step(name='cleaning_output',
                           transformer=Dummy(),
                           input_data=['input'],
                           input_steps=[xy_train, text_cleaner],
                           adapter={'X': ([('text_cleaner_train', 'X')]),
                                    'y': ([('xy_train', 'y')]),
                                    'train_mode': ([('input', 'train_mode')]),
                                    },
                           cache_dirpath=config.env.cache_dirpath)
    return cleaning_output
def word_lstm_train(config):
    preprocessed_input = train_preprocessing(config)
    word_tokenizer = Step(name='word_tokenizer',
                          transformer=Tokenizer(**config.word_tokenizer),
                          input_steps=[preprocessed_input],
                          adapter={'X': ([('cleaning_output', 'X')]),
                                   'X_valid': ([('cleaning_output', 'X_valid')]),
                                   'train_mode': ([('cleaning_output', 'train_mode')])
                                   },
                          cache_dirpath=config.env.cache_dirpath)

    word_lstm = Step(name='word_lstm',
                     transformer=WordLSTM(**config.word_lstm_network),
                     overwrite_transformer=True,
                     input_steps=[word_tokenizer, preprocessed_input],
                     adapter={'X': ([('word_tokenizer', 'X')]),
                              'y': ([('cleaning_output', 'y')]),
                              'validation_data': (
                                  [('word_tokenizer', 'X_valid'), ('cleaning_output', 'y_valid')], to_tuple_inputs),
                              },
                     cache_dirpath=config.env.cache_dirpath)
    word_output = Step(name='word_output',
                       transformer=Dummy(),
                       input_steps=[word_lstm],
                       adapter={'y_pred': ([('word_lstm', 'prediction_probability')]),
                                },
                       cache_dirpath=config.env.cache_dirpath)
    return word_output
Exemplo n.º 23
0
def seq_conv_train(config):
    xy_train = Step(name='xy_train',
                    transformer=XYSplit(**config.xy_splitter),
                    input_data=['input'],
                    adapter={'meta': ([('input', 'meta')]),
                             'train_mode': ([('input', 'train_mode')])
                             },
                    cache_dirpath=config.env.cache_dirpath)

    xy_inference = Step(name='xy_inference',
                        transformer=XYSplit(**config.xy_splitter),
                        input_data=['input'],
                        adapter={'meta': ([('input', 'meta_valid')]),
                                 'train_mode': ([('input', 'train_mode')])
                                 },
                        cache_dirpath=config.env.cache_dirpath)

    loader_train = Step(name='loader',
                        transformer=MetadataImageSegmentationLoader(**config.loader),
                        input_data=['input'],
                        input_steps=[xy_train, xy_inference],
                        adapter={'X': ([('xy_train', 'X')], squeeze_inputs),
                                 'y': ([('xy_train', 'y')], squeeze_inputs),
                                 'train_mode': ([('input', 'train_mode')]),
                                 'X_valid': ([('xy_inference', 'X')], squeeze_inputs),
                                 'y_valid': ([('xy_inference', 'y')], squeeze_inputs),
                                 },
                        cache_dirpath=config.env.cache_dirpath)

    sequential_convnet = Step(name='sequential_convnet',
                              transformer=SequentialConvNet(**config.sequential_convnet),
                              input_steps=[loader_train],
                              cache_dirpath=config.env.cache_dirpath)

    mask_resize = Step(name='mask_resize',
                       transformer=Resizer(),
                       input_data=['input'],
                       input_steps=[sequential_convnet],
                       adapter={'images': ([('sequential_convnet', 'predicted_masks')]),
                                'target_sizes': ([('input', 'target_sizes')]),
                                },
                       cache_dirpath=config.env.cache_dirpath)

    thresholding = Step(name='thresholding',
                        transformer=Thresholder(**config.thresholder),
                        input_steps=[mask_resize],
                        adapter={'images': ([('mask_resize', 'resized_images')]),
                                 },
                        cache_dirpath=config.env.cache_dirpath)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[thresholding],
                  adapter={'y_pred': ([('thresholding', 'binarized_images')]),
                           },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 24
0
def unet_padded(config):
    save_output = False

    unet_pipeline = unet(config, train_mode=False).get_step('unet')

    loader = unet_pipeline.get_step("loader")
    loader.transformer = loaders.ImageSegmentationLoaderInferencePadding(
        **config.loader)

    prediction_crop = Step(name='prediction_crop',
                           transformer=post.PredictionCropStream(
                               **config.postprocessor.prediction_crop) if config.execution.stream_mode \
                               else post.PredictionCrop(**config.postprocessor.prediction_crop),
                           input_steps=[unet_pipeline],
                           adapter={'images': ([(unet_pipeline.name, 'multichannel_map_prediction')]), },
                           cache_dirpath=config.env.cache_dirpath,
                           save_output=save_output)

    prediction_renamed = Step(name='prediction_renamed',
                              transformer=Dummy(),
                              input_steps=[prediction_crop],
                              adapter={
                                  'multichannel_map_prediction':
                                  ([(prediction_crop.name, 'cropped_images')]),
                              },
                              cache_dirpath=config.env.cache_dirpath,
                              save_output=save_output)
    mask_postprocessed = mask_postprocessing(loader,
                                             prediction_renamed,
                                             config,
                                             save_output=save_output)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[mask_postprocessed],
                  adapter={
                      'y_pred':
                      ([(mask_postprocessed.name, 'images_with_scores')]),
                  },
                  cache_dirpath=config.env.cache_dirpath,
                  save_output=save_output)
    return output
Exemplo n.º 25
0
def _price_features(dispatchers, config, train_mode, **kwargs):
    if train_mode:
        feature_by_type_split, feature_by_type_split_valid = dispatchers
        price_features = Step(name='price_features',
                              transformer=Dummy(),
                              input_steps=[feature_by_type_split],
                              adapter={
                                  'numerical_features':
                                  ([(feature_by_type_split.name,
                                     'numerical_features')])
                              },
                              cache_dirpath=config.env.cache_dirpath,
                              **kwargs)

        price_features_valid = Step(name='price_features_valid',
                                    transformer=price_features,
                                    input_steps=[feature_by_type_split_valid],
                                    adapter={
                                        'numerical_features':
                                        ([(feature_by_type_split_valid.name,
                                           'numerical_features')])
                                    },
                                    cache_dirpath=config.env.cache_dirpath,
                                    **kwargs)

        return price_features, price_features_valid

    else:
        feature_by_type_split = dispatchers
        price_features = Step(name='price_features',
                              transformer=Dummy(),
                              input_steps=[feature_by_type_split],
                              adapter={
                                  'numerical_features':
                                  ([(feature_by_type_split.name,
                                     'numerical_features')])
                              },
                              cache_dirpath=config.env.cache_dirpath,
                              **kwargs)

        return price_features
Exemplo n.º 26
0
def postprocessing__pipeline_simplified(cache_dirpath):
    mask_resize = Step(name='mask_resize',
                       transformer=post.Resizer(),
                       input_data=['unet_output', 'callback_input'],
                       adapter={
                           'images':
                           ([('unet_output', 'multichannel_map_prediction')]),
                           'target_sizes':
                           ([('callback_input', 'target_sizes')]),
                       },
                       cache_dirpath=cache_dirpath)

    category_mapper = Step(name='category_mapper',
                           transformer=post.CategoryMapper(),
                           input_steps=[mask_resize],
                           adapter={
                               'images': ([('mask_resize', 'resized_images')]),
                           },
                           cache_dirpath=cache_dirpath)

    labeler = Step(name='labeler',
                   transformer=post.MulticlassLabeler(),
                   input_steps=[category_mapper],
                   adapter={
                       'images':
                       ([(category_mapper.name, 'categorized_images')]),
                   },
                   cache_dirpath=cache_dirpath)

    score_builder = Step(name='score_builder',
                         transformer=post.ScoreBuilder(),
                         input_steps=[labeler, mask_resize],
                         adapter={
                             'images': ([(labeler.name, 'labeled_images')]),
                             'probabilities':
                             ([(mask_resize.name, 'resized_images')]),
                         },
                         cache_dirpath=cache_dirpath)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[score_builder],
                  adapter={
                      'y_pred': ([(score_builder.name, 'images_with_scores')]),
                  },
                  cache_dirpath=cache_dirpath)

    return output
Exemplo n.º 27
0
def word2vec_dpcnn(config, is_train):
    preprocessed_input = _preprocessing(config, is_train)
    word_tokenizer = _word_tokenizer(preprocessed_input, config, is_train)
    word2vec_embeddings = _word2vec_embeddings(word_tokenizer, config)
    if is_train:
        word2vec_dpcnn = Step(name='word2vec_dpcnn',
                              transformer=WordDPCNN(**config.dpcnn_network),
                              overwrite_transformer=True,
                              input_steps=[
                                  word_tokenizer, preprocessed_input,
                                  word2vec_embeddings
                              ],
                              adapter={
                                  'X': ([('word_tokenizer', 'X')]),
                                  'y': ([('cleaning_output', 'y')]),
                                  'embedding_matrix':
                                  ([('word2vec_embeddings',
                                     'embeddings_matrix')]),
                                  'validation_data': ([
                                      ('word_tokenizer', 'X_valid'),
                                      ('cleaning_output', 'y_valid')
                                  ], to_tuple_inputs),
                              },
                              cache_dirpath=config.env.cache_dirpath)
    else:
        word2vec_dpcnn = Step(name='word2vec_dpcnn',
                              transformer=WordDPCNN(**config.dpcnn_network),
                              input_steps=[
                                  word_tokenizer, preprocessed_input,
                                  word2vec_embeddings
                              ],
                              adapter={
                                  'X': ([('word_tokenizer', 'X')]),
                                  'y': ([('cleaning_output', 'y')]),
                                  'embedding_matrix':
                                  ([('word2vec_embeddings',
                                     'embeddings_matrix')]),
                              },
                              cache_dirpath=config.env.cache_dirpath)
    output = Step(name='word2vec_dpcnn_output',
                  transformer=Dummy(),
                  input_steps=[word2vec_dpcnn],
                  adapter={
                      'y_pred':
                      ([('word2vec_dpcnn', 'prediction_probability')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 28
0
def unet_inference(config):
    xy_inference = Step(name='xy_inference',
                        transformer=XYSplit(**config.xy_splitter),
                        input_data=['input'],
                        adapter={'meta': ([('input', 'meta')]),
                                 'train_mode': ([('input', 'train_mode')])
                                 },
                        cache_dirpath=config.env.cache_dirpath)

    loader_inference = Step(name='loader',
                            transformer=MetadataImageSegmentationLoader(**config.loader),
                            input_data=['input'],
                            input_steps=[xy_inference, xy_inference],
                            adapter={'X': ([('xy_inference', 'X')], squeeze_inputs),
                                     'y': ([('xy_inference', 'y')], squeeze_inputs),
                                     'train_mode': ([('input', 'train_mode')]),
                                     },
                            cache_dirpath=config.env.cache_dirpath)

    unet_network = Step(name='unet_network',
                        transformer=PyTorchUNet(**config.unet_network),
                        input_steps=[loader_inference],
                        cache_dirpath=config.env.cache_dirpath)

    mask_resize = Step(name='mask_resize',
                       transformer=Resizer(),
                       input_data=['input'],
                       input_steps=[unet_network],
                       adapter={'images': ([('unet_network', 'predicted_masks')]),
                                'target_sizes': ([('input', 'target_sizes')]),
                                },
                       cache_dirpath=config.env.cache_dirpath)

    thresholding = Step(name='thresholding',
                        transformer=Thresholder(**config.thresholder),
                        input_steps=[mask_resize],
                        adapter={'images': ([('mask_resize', 'resized_images')]),
                                 },
                        cache_dirpath=config.env.cache_dirpath)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[thresholding],
                  adapter={'y_pred': ([('thresholding', 'binarized_images')]),
                           },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 29
0
def baseline(config, train_mode):
    if train_mode:
        features, features_valid = feature_extraction_v0(config, train_mode)
        light_gbm = classifier_lgbm((features, features_valid), config,
                                    train_mode)
    else:
        features = feature_extraction_v0(config, train_mode)
        light_gbm = classifier_lgbm(features, config, train_mode)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[light_gbm],
                  adapter={
                      'y_pred': ([(light_gbm.name, 'prediction')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Exemplo n.º 30
0
def fasttext_lstm(config, is_train):
    preprocessed_input = _preprocessing(config, is_train)
    word_tokenizer = _word_tokenizer(preprocessed_input, config, is_train)
    fasttext_embeddings = _fasttext_embeddings(word_tokenizer, config)
    if is_train:
        fasttext_lstm = Step(name='fasttext_lstm',
                             transformer=WordCuDNNLSTM(**config.lstm_network),
                             overwrite_transformer=True,
                             input_steps=[
                                 word_tokenizer, preprocessed_input,
                                 fasttext_embeddings
                             ],
                             adapter={
                                 'X': ([('word_tokenizer', 'X')]),
                                 'y': ([('cleaning_output', 'y')]),
                                 'embedding_matrix': ([('fasttext_embeddings',
                                                        'embeddings_matrix')]),
                                 'validation_data': ([
                                     ('word_tokenizer', 'X_valid'),
                                     ('cleaning_output', 'y_valid')
                                 ], to_tuple_inputs),
                             },
                             cache_dirpath=config.env.cache_dirpath)
    else:
        fasttext_lstm = Step(name='fasttext_lstm',
                             transformer=WordCuDNNLSTM(**config.lstm_network),
                             input_steps=[
                                 word_tokenizer, preprocessed_input,
                                 fasttext_embeddings
                             ],
                             adapter={
                                 'X': ([('word_tokenizer', 'X')]),
                                 'y': ([('cleaning_output', 'y')]),
                                 'embedding_matrix': ([('fasttext_embeddings',
                                                        'embeddings_matrix')]),
                             },
                             cache_dirpath=config.env.cache_dirpath)
    output = Step(name='fasttext_lstm_output',
                  transformer=Dummy(),
                  input_steps=[fasttext_lstm],
                  adapter={
                      'y_pred':
                      ([('fasttext_lstm', 'prediction_probability')]),
                  },
                  cache_dirpath=config.env.cache_dirpath)
    return output